| 2 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | // SPDX-License-Identifier: GPL-2.0-only /* Kernel module to match AH parameters. */ /* (C) 1999-2000 Yon Uriarte <yon@astaro.de> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/in.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/netfilter_ipv4/ipt_ah.h> #include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>"); MODULE_DESCRIPTION("Xtables: IPv4 IPsec-AH SPI match"); /* Returns 1 if the spi is matched by the range, 0 otherwise */ static inline bool spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) { bool r; pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n", invert ? '!' : ' ', min, spi, max); r = (spi >= min && spi <= max) ^ invert; pr_debug(" result %s\n", r ? "PASS" : "FAILED"); return r; } static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par) { struct ip_auth_hdr _ahdr; const struct ip_auth_hdr *ah; const struct ipt_ah *ahinfo = par->matchinfo; /* Must not be a fragment. */ if (par->fragoff != 0) return false; ah = skb_header_pointer(skb, par->thoff, sizeof(_ahdr), &_ahdr); if (ah == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ pr_debug("Dropping evil AH tinygram.\n"); par->hotdrop = true; return false; } return spi_match(ahinfo->spis[0], ahinfo->spis[1], ntohl(ah->spi), !!(ahinfo->invflags & IPT_AH_INV_SPI)); } static int ah_mt_check(const struct xt_mtchk_param *par) { const struct ipt_ah *ahinfo = par->matchinfo; /* Must specify no unknown invflags */ if (ahinfo->invflags & ~IPT_AH_INV_MASK) { pr_debug("unknown flags %X\n", ahinfo->invflags); return -EINVAL; } return 0; } static struct xt_match ah_mt_reg __read_mostly = { .name = "ah", .family = NFPROTO_IPV4, .match = ah_mt, .matchsize = sizeof(struct ipt_ah), .proto = IPPROTO_AH, .checkentry = ah_mt_check, .me = THIS_MODULE, }; static int __init ah_mt_init(void) { return xt_register_match(&ah_mt_reg); } static void __exit ah_mt_exit(void) { xt_unregister_match(&ah_mt_reg); } module_init(ah_mt_init); module_exit(ah_mt_exit); |
| 449 450 10 9 8 8 8 3 8 8 8 1 9 10 8 7 7 8 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 | // SPDX-License-Identifier: GPL-2.0-or-later /* * "TEE" target extension for Xtables * Copyright © Sebastian Claßen, 2007 * Jan Engelhardt, 2007-2010 * * based on ipt_ROUTE.c from Cédric de Launois * <delaunois@info.ucl.be> */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/route.h> #include <linux/netfilter/x_tables.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/route.h> #include <net/netfilter/ipv4/nf_dup_ipv4.h> #include <net/netfilter/ipv6/nf_dup_ipv6.h> #include <linux/netfilter/xt_TEE.h> struct xt_tee_priv { struct list_head list; struct xt_tee_tginfo *tginfo; int oif; }; static unsigned int tee_net_id __read_mostly; static const union nf_inet_addr tee_zero_address; struct tee_net { struct list_head priv_list; /* lock protects the priv_list */ struct mutex lock; }; static unsigned int tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; int oif = info->priv ? info->priv->oif : 0; nf_dup_ipv4(xt_net(par), skb, xt_hooknum(par), &info->gw.in, oif); return XT_CONTINUE; } #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static unsigned int tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; int oif = info->priv ? info->priv->oif : 0; nf_dup_ipv6(xt_net(par), skb, xt_hooknum(par), &info->gw.in6, oif); return XT_CONTINUE; } #endif static int tee_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct tee_net *tn = net_generic(net, tee_net_id); struct xt_tee_priv *priv; mutex_lock(&tn->lock); list_for_each_entry(priv, &tn->priv_list, list) { switch (event) { case NETDEV_REGISTER: if (!strcmp(dev->name, priv->tginfo->oif)) priv->oif = dev->ifindex; break; case NETDEV_UNREGISTER: if (dev->ifindex == priv->oif) priv->oif = -1; break; case NETDEV_CHANGENAME: if (!strcmp(dev->name, priv->tginfo->oif)) priv->oif = dev->ifindex; else if (dev->ifindex == priv->oif) priv->oif = -1; break; } } mutex_unlock(&tn->lock); return NOTIFY_DONE; } static int tee_tg_check(const struct xt_tgchk_param *par) { struct tee_net *tn = net_generic(par->net, tee_net_id); struct xt_tee_tginfo *info = par->targinfo; struct xt_tee_priv *priv; /* 0.0.0.0 and :: not allowed */ if (memcmp(&info->gw, &tee_zero_address, sizeof(tee_zero_address)) == 0) return -EINVAL; if (info->oif[0]) { struct net_device *dev; if (info->oif[sizeof(info->oif)-1] != '\0') return -EINVAL; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (priv == NULL) return -ENOMEM; priv->tginfo = info; priv->oif = -1; info->priv = priv; dev = dev_get_by_name(par->net, info->oif); if (dev) { priv->oif = dev->ifindex; dev_put(dev); } mutex_lock(&tn->lock); list_add(&priv->list, &tn->priv_list); mutex_unlock(&tn->lock); } else info->priv = NULL; static_key_slow_inc(&xt_tee_enabled); return 0; } static void tee_tg_destroy(const struct xt_tgdtor_param *par) { struct tee_net *tn = net_generic(par->net, tee_net_id); struct xt_tee_tginfo *info = par->targinfo; if (info->priv) { mutex_lock(&tn->lock); list_del(&info->priv->list); mutex_unlock(&tn->lock); kfree(info->priv); } static_key_slow_dec(&xt_tee_enabled); } static struct xt_target tee_tg_reg[] __read_mostly = { { .name = "TEE", .revision = 1, .family = NFPROTO_IPV4, .target = tee_tg4, .targetsize = sizeof(struct xt_tee_tginfo), .usersize = offsetof(struct xt_tee_tginfo, priv), .checkentry = tee_tg_check, .destroy = tee_tg_destroy, .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "TEE", .revision = 1, .family = NFPROTO_IPV6, .target = tee_tg6, .targetsize = sizeof(struct xt_tee_tginfo), .usersize = offsetof(struct xt_tee_tginfo, priv), .checkentry = tee_tg_check, .destroy = tee_tg_destroy, .me = THIS_MODULE, }, #endif }; static int __net_init tee_net_init(struct net *net) { struct tee_net *tn = net_generic(net, tee_net_id); INIT_LIST_HEAD(&tn->priv_list); mutex_init(&tn->lock); return 0; } static struct pernet_operations tee_net_ops = { .init = tee_net_init, .id = &tee_net_id, .size = sizeof(struct tee_net), }; static struct notifier_block tee_netdev_notifier = { .notifier_call = tee_netdev_event, }; static int __init tee_tg_init(void) { int ret; ret = register_pernet_subsys(&tee_net_ops); if (ret < 0) return ret; ret = xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); if (ret < 0) goto cleanup_subsys; ret = register_netdevice_notifier(&tee_netdev_notifier); if (ret < 0) goto unregister_targets; return 0; unregister_targets: xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); cleanup_subsys: unregister_pernet_subsys(&tee_net_ops); return ret; } static void __exit tee_tg_exit(void) { unregister_netdevice_notifier(&tee_netdev_notifier); xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); unregister_pernet_subsys(&tee_net_ops); } module_init(tee_tg_init); module_exit(tee_tg_exit); MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>"); MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); MODULE_DESCRIPTION("Xtables: Reroute packet copy"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_TEE"); MODULE_ALIAS("ip6t_TEE"); |
| 7 8 7 8 6 6 6 2 4 7 7 2 8 10 21 21 21 20 1 20 4 3 1 4 2 2 2 1 1 4 21 20 20 21 16 10 9 8 6 6 5 5 5 6 3 3 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 | /* * linux/drivers/video/fbcmap.c -- Colormap handling for frame buffer devices * * Created 15 Jun 1997 by Geert Uytterhoeven * * 2001 - Documented with DocBook * - Brad Douglas <brad@neruo.com> * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive for * more details. */ #include <linux/string.h> #include <linux/module.h> #include <linux/fb.h> #include <linux/slab.h> #include <linux/uaccess.h> static u16 red2[] __read_mostly = { 0x0000, 0xaaaa }; static u16 green2[] __read_mostly = { 0x0000, 0xaaaa }; static u16 blue2[] __read_mostly = { 0x0000, 0xaaaa }; static u16 red4[] __read_mostly = { 0x0000, 0xaaaa, 0x5555, 0xffff }; static u16 green4[] __read_mostly = { 0x0000, 0xaaaa, 0x5555, 0xffff }; static u16 blue4[] __read_mostly = { 0x0000, 0xaaaa, 0x5555, 0xffff }; static u16 red8[] __read_mostly = { 0x0000, 0x0000, 0x0000, 0x0000, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; static u16 green8[] __read_mostly = { 0x0000, 0x0000, 0xaaaa, 0xaaaa, 0x0000, 0x0000, 0x5555, 0xaaaa }; static u16 blue8[] __read_mostly = { 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x0000, 0xaaaa }; static u16 red16[] __read_mostly = { 0x0000, 0x0000, 0x0000, 0x0000, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0x5555, 0x5555, 0x5555, 0x5555, 0xffff, 0xffff, 0xffff, 0xffff }; static u16 green16[] __read_mostly = { 0x0000, 0x0000, 0xaaaa, 0xaaaa, 0x0000, 0x0000, 0x5555, 0xaaaa, 0x5555, 0x5555, 0xffff, 0xffff, 0x5555, 0x5555, 0xffff, 0xffff }; static u16 blue16[] __read_mostly = { 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x5555, 0xffff, 0x5555, 0xffff, 0x5555, 0xffff, 0x5555, 0xffff }; static const struct fb_cmap default_2_colors = { .len=2, .red=red2, .green=green2, .blue=blue2 }; static const struct fb_cmap default_8_colors = { .len=8, .red=red8, .green=green8, .blue=blue8 }; static const struct fb_cmap default_4_colors = { .len=4, .red=red4, .green=green4, .blue=blue4 }; static const struct fb_cmap default_16_colors = { .len=16, .red=red16, .green=green16, .blue=blue16 }; /** * fb_alloc_cmap_gfp - allocate a colormap * @cmap: frame buffer colormap structure * @len: length of @cmap * @transp: boolean, 1 if there is transparency, 0 otherwise * @flags: flags for kmalloc memory allocation * * Allocates memory for a colormap @cmap. @len is the * number of entries in the palette. * * Returns negative errno on error, or zero on success. * */ int fb_alloc_cmap_gfp(struct fb_cmap *cmap, int len, int transp, gfp_t flags) { int size = len * sizeof(u16); int ret = -ENOMEM; flags |= __GFP_NOWARN; if (cmap->len != len) { fb_dealloc_cmap(cmap); if (!len) return 0; cmap->red = kzalloc(size, flags); if (!cmap->red) goto fail; cmap->green = kzalloc(size, flags); if (!cmap->green) goto fail; cmap->blue = kzalloc(size, flags); if (!cmap->blue) goto fail; if (transp) { cmap->transp = kzalloc(size, flags); if (!cmap->transp) goto fail; } else { cmap->transp = NULL; } } cmap->start = 0; cmap->len = len; ret = fb_copy_cmap(fb_default_cmap(len), cmap); if (ret) goto fail; return 0; fail: fb_dealloc_cmap(cmap); return ret; } int fb_alloc_cmap(struct fb_cmap *cmap, int len, int transp) { return fb_alloc_cmap_gfp(cmap, len, transp, GFP_ATOMIC); } /** * fb_dealloc_cmap - deallocate a colormap * @cmap: frame buffer colormap structure * * Deallocates a colormap that was previously allocated with * fb_alloc_cmap(). * */ void fb_dealloc_cmap(struct fb_cmap *cmap) { kfree(cmap->red); kfree(cmap->green); kfree(cmap->blue); kfree(cmap->transp); cmap->red = cmap->green = cmap->blue = cmap->transp = NULL; cmap->len = 0; } /** * fb_copy_cmap - copy a colormap * @from: frame buffer colormap structure * @to: frame buffer colormap structure * * Copy contents of colormap from @from to @to. */ int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to) { unsigned int tooff = 0, fromoff = 0; size_t size; if (to->start > from->start) fromoff = to->start - from->start; else tooff = from->start - to->start; if (fromoff >= from->len || tooff >= to->len) return -EINVAL; size = min_t(size_t, to->len - tooff, from->len - fromoff); if (size == 0) return -EINVAL; size *= sizeof(u16); memcpy(to->red+tooff, from->red+fromoff, size); memcpy(to->green+tooff, from->green+fromoff, size); memcpy(to->blue+tooff, from->blue+fromoff, size); if (from->transp && to->transp) memcpy(to->transp+tooff, from->transp+fromoff, size); return 0; } int fb_cmap_to_user(const struct fb_cmap *from, struct fb_cmap_user *to) { unsigned int tooff = 0, fromoff = 0; size_t size; if (to->start > from->start) fromoff = to->start - from->start; else tooff = from->start - to->start; if (fromoff >= from->len || tooff >= to->len) return -EINVAL; size = min_t(size_t, to->len - tooff, from->len - fromoff); if (size == 0) return -EINVAL; size *= sizeof(u16); if (copy_to_user(to->red+tooff, from->red+fromoff, size)) return -EFAULT; if (copy_to_user(to->green+tooff, from->green+fromoff, size)) return -EFAULT; if (copy_to_user(to->blue+tooff, from->blue+fromoff, size)) return -EFAULT; if (from->transp && to->transp) if (copy_to_user(to->transp+tooff, from->transp+fromoff, size)) return -EFAULT; return 0; } /** * fb_set_cmap - set the colormap * @cmap: frame buffer colormap structure * @info: frame buffer info structure * * Sets the colormap @cmap for a screen of device @info. * * Returns negative errno on error, or zero on success. * */ int fb_set_cmap(struct fb_cmap *cmap, struct fb_info *info) { int i, start, rc = 0; u16 *red, *green, *blue, *transp; u_int hred, hgreen, hblue, htransp = 0xffff; red = cmap->red; green = cmap->green; blue = cmap->blue; transp = cmap->transp; start = cmap->start; if (start < 0 || (!info->fbops->fb_setcolreg && !info->fbops->fb_setcmap)) return -EINVAL; if (info->fbops->fb_setcmap) { rc = info->fbops->fb_setcmap(cmap, info); } else { for (i = 0; i < cmap->len; i++) { hred = *red++; hgreen = *green++; hblue = *blue++; if (transp) htransp = *transp++; if (info->fbops->fb_setcolreg(start++, hred, hgreen, hblue, htransp, info)) break; } } if (rc == 0) fb_copy_cmap(cmap, &info->cmap); return rc; } int fb_set_user_cmap(struct fb_cmap_user *cmap, struct fb_info *info) { int rc, size = cmap->len * sizeof(u16); struct fb_cmap umap; if (size < 0 || size < cmap->len) return -E2BIG; memset(&umap, 0, sizeof(struct fb_cmap)); rc = fb_alloc_cmap_gfp(&umap, cmap->len, cmap->transp != NULL, GFP_KERNEL); if (rc) return rc; if (copy_from_user(umap.red, cmap->red, size) || copy_from_user(umap.green, cmap->green, size) || copy_from_user(umap.blue, cmap->blue, size) || (cmap->transp && copy_from_user(umap.transp, cmap->transp, size))) { rc = -EFAULT; goto out; } umap.start = cmap->start; lock_fb_info(info); rc = fb_set_cmap(&umap, info); unlock_fb_info(info); out: fb_dealloc_cmap(&umap); return rc; } /** * fb_default_cmap - get default colormap * @len: size of palette for a depth * * Gets the default colormap for a specific screen depth. @len * is the size of the palette for a particular screen depth. * * Returns pointer to a frame buffer colormap structure. * */ const struct fb_cmap *fb_default_cmap(int len) { if (len <= 2) return &default_2_colors; if (len <= 4) return &default_4_colors; if (len <= 8) return &default_8_colors; return &default_16_colors; } /** * fb_invert_cmaps - invert all defaults colormaps * * Invert all default colormaps. * */ void fb_invert_cmaps(void) { u_int i; for (i = 0; i < ARRAY_SIZE(red2); i++) { red2[i] = ~red2[i]; green2[i] = ~green2[i]; blue2[i] = ~blue2[i]; } for (i = 0; i < ARRAY_SIZE(red4); i++) { red4[i] = ~red4[i]; green4[i] = ~green4[i]; blue4[i] = ~blue4[i]; } for (i = 0; i < ARRAY_SIZE(red8); i++) { red8[i] = ~red8[i]; green8[i] = ~green8[i]; blue8[i] = ~blue8[i]; } for (i = 0; i < ARRAY_SIZE(red16); i++) { red16[i] = ~red16[i]; green16[i] = ~green16[i]; blue16[i] = ~blue16[i]; } } /* * Visible symbols for modules */ EXPORT_SYMBOL(fb_alloc_cmap); EXPORT_SYMBOL(fb_dealloc_cmap); EXPORT_SYMBOL(fb_copy_cmap); EXPORT_SYMBOL(fb_set_cmap); EXPORT_SYMBOL(fb_default_cmap); EXPORT_SYMBOL(fb_invert_cmaps); |
| 4 3 4 3 3 1 4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2019 FORTH-ICS/CARV * Nick Kossifidis <mick@ics.forth.gr> */ #include <linux/kexec.h> #include <asm/kexec.h> /* For riscv_kexec_* symbol defines */ #include <linux/smp.h> /* For smp_send_stop () */ #include <asm/cacheflush.h> /* For local_flush_icache_all() */ #include <asm/barrier.h> /* For smp_wmb() */ #include <asm/page.h> /* For PAGE_MASK */ #include <linux/libfdt.h> /* For fdt_check_header() */ #include <asm/set_memory.h> /* For set_memory_x() */ #include <linux/compiler.h> /* For unreachable() */ #include <linux/cpu.h> /* For cpu_down() */ #include <linux/reboot.h> #include <linux/interrupt.h> #include <linux/irq.h> /* * machine_kexec_prepare - Initialize kexec * * This function is called from do_kexec_load, when the user has * provided us with an image to be loaded. Its goal is to validate * the image and prepare the control code buffer as needed. * Note that kimage_alloc_init has already been called and the * control buffer has already been allocated. */ int machine_kexec_prepare(struct kimage *image) { struct kimage_arch *internal = &image->arch; struct fdt_header fdt = {0}; void *control_code_buffer = NULL; unsigned int control_code_buffer_sz = 0; int i = 0; /* Find the Flattened Device Tree and save its physical address */ for (i = 0; i < image->nr_segments; i++) { if (image->segment[i].memsz <= sizeof(fdt)) continue; if (image->file_mode) memcpy(&fdt, image->segment[i].buf, sizeof(fdt)); else if (copy_from_user(&fdt, image->segment[i].buf, sizeof(fdt))) continue; if (fdt_check_header(&fdt)) continue; internal->fdt_addr = (unsigned long) image->segment[i].mem; break; } if (!internal->fdt_addr) { pr_err("Device tree not included in the provided image\n"); return -EINVAL; } /* Copy the assembler code for relocation to the control page */ if (image->type != KEXEC_TYPE_CRASH) { control_code_buffer = page_address(image->control_code_page); control_code_buffer_sz = page_size(image->control_code_page); if (unlikely(riscv_kexec_relocate_size > control_code_buffer_sz)) { pr_err("Relocation code doesn't fit within a control page\n"); return -EINVAL; } memcpy(control_code_buffer, riscv_kexec_relocate, riscv_kexec_relocate_size); /* Mark the control page executable */ set_memory_x((unsigned long) control_code_buffer, 1); } return 0; } /* * machine_kexec_cleanup - Cleanup any leftovers from * machine_kexec_prepare * * This function is called by kimage_free to handle any arch-specific * allocations done on machine_kexec_prepare. Since we didn't do any * allocations there, this is just an empty function. Note that the * control buffer is freed by kimage_free. */ void machine_kexec_cleanup(struct kimage *image) { } /* * machine_shutdown - Prepare for a kexec reboot * * This function is called by kernel_kexec just before machine_kexec * below. Its goal is to prepare the rest of the system (the other * harts and possibly devices etc) for a kexec reboot. */ void machine_shutdown(void) { /* * No more interrupts on this hart * until we are back up. */ local_irq_disable(); #if defined(CONFIG_HOTPLUG_CPU) smp_shutdown_nonboot_cpus(smp_processor_id()); #endif } /* * machine_crash_shutdown - Prepare to kexec after a kernel crash * * This function is called by crash_kexec just before machine_kexec * and its goal is to shutdown non-crashing cpus and save registers. */ void machine_crash_shutdown(struct pt_regs *regs) { local_irq_disable(); /* shutdown non-crashing cpus */ crash_smp_send_stop(); crash_save_cpu(regs, smp_processor_id()); machine_kexec_mask_interrupts(); pr_info("Starting crashdump kernel...\n"); } /* * machine_kexec - Jump to the loaded kimage * * This function is called by kernel_kexec which is called by the * reboot system call when the reboot cmd is LINUX_REBOOT_CMD_KEXEC, * or by crash_kernel which is called by the kernel's arch-specific * trap handler in case of a kernel panic. It's the final stage of * the kexec process where the pre-loaded kimage is ready to be * executed. We assume at this point that all other harts are * suspended and this hart will be the new boot hart. */ void __noreturn machine_kexec(struct kimage *image) { struct kimage_arch *internal = &image->arch; unsigned long jump_addr = (unsigned long) image->start; unsigned long first_ind_entry = (unsigned long) &image->head; unsigned long this_cpu_id = __smp_processor_id(); unsigned long this_hart_id = cpuid_to_hartid_map(this_cpu_id); unsigned long fdt_addr = internal->fdt_addr; void *control_code_buffer = page_address(image->control_code_page); riscv_kexec_method kexec_method = NULL; #ifdef CONFIG_SMP WARN(smp_crash_stop_failed(), "Some CPUs may be stale, kdump will be unreliable.\n"); #endif if (image->type != KEXEC_TYPE_CRASH) kexec_method = control_code_buffer; else kexec_method = (riscv_kexec_method) &riscv_kexec_norelocate; pr_notice("Will call new kernel at %08lx from hart id %lx\n", jump_addr, this_hart_id); pr_notice("FDT image at %08lx\n", fdt_addr); /* Make sure the relocation code is visible to the hart */ local_flush_icache_all(); /* Jump to the relocation code */ pr_notice("Bye...\n"); kexec_method(first_ind_entry, jump_addr, fdt_addr, this_hart_id, kernel_map.va_pa_offset); unreachable(); } |
| 3 2 1 3 2 2 2 3 1 3 1 1 3 3 3 3 3 1 2 2 1 1 2 1 2 2 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 | // SPDX-License-Identifier: GPL-2.0 /* USB Driver for Sierra Wireless Copyright (C) 2006, 2007, 2008 Kevin Lloyd <klloyd@sierrawireless.com>, Copyright (C) 2008, 2009 Elina Pasheva, Matthew Safar, Rory Filer <linux@sierrawireless.com> IMPORTANT DISCLAIMER: This driver is not commercially supported by Sierra Wireless. Use at your own risk. Portions based on the option driver by Matthias Urlichs <smurf@smurf.noris.de> Whom based his on the Keyspan driver by Hugh Blemings <hugh@blemings.org> */ /* Uncomment to log function calls */ /* #define DEBUG */ #define DRIVER_AUTHOR "Kevin Lloyd, Elina Pasheva, Matthew Safar, Rory Filer" #define DRIVER_DESC "USB Driver for Sierra Wireless USB modems" #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/errno.h> #include <linux/tty.h> #include <linux/slab.h> #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/usb.h> #include <linux/usb/serial.h> #define SWIMS_USB_REQUEST_SetPower 0x00 #define SWIMS_USB_REQUEST_SetNmea 0x07 #define N_IN_URB_HM 8 #define N_OUT_URB_HM 64 #define N_IN_URB 4 #define N_OUT_URB 4 #define IN_BUFLEN 4096 #define MAX_TRANSFER (PAGE_SIZE - 512) /* MAX_TRANSFER is chosen so that the VM is not stressed by allocations > PAGE_SIZE and the number of packets in a page is an integer 512 is the largest possible packet on EHCI */ static bool nmea; struct sierra_iface_list { const u8 *nums; /* array of interface numbers */ size_t count; /* number of elements in array */ }; struct sierra_intf_private { spinlock_t susp_lock; unsigned int suspended:1; int in_flight; unsigned int open_ports; }; static int sierra_set_power_state(struct usb_device *udev, __u16 swiState) { return usb_control_msg(udev, usb_sndctrlpipe(udev, 0), SWIMS_USB_REQUEST_SetPower, /* __u8 request */ USB_TYPE_VENDOR, /* __u8 request type */ swiState, /* __u16 value */ 0, /* __u16 index */ NULL, /* void *data */ 0, /* __u16 size */ USB_CTRL_SET_TIMEOUT); /* int timeout */ } static int sierra_vsc_set_nmea(struct usb_device *udev, __u16 enable) { return usb_control_msg(udev, usb_sndctrlpipe(udev, 0), SWIMS_USB_REQUEST_SetNmea, /* __u8 request */ USB_TYPE_VENDOR, /* __u8 request type */ enable, /* __u16 value */ 0x0000, /* __u16 index */ NULL, /* void *data */ 0, /* __u16 size */ USB_CTRL_SET_TIMEOUT); /* int timeout */ } static int sierra_calc_num_ports(struct usb_serial *serial, struct usb_serial_endpoints *epds) { int num_ports = 0; u8 ifnum, numendpoints; ifnum = serial->interface->cur_altsetting->desc.bInterfaceNumber; numendpoints = serial->interface->cur_altsetting->desc.bNumEndpoints; /* Dummy interface present on some SKUs should be ignored */ if (ifnum == 0x99) num_ports = 0; else if (numendpoints <= 3) num_ports = 1; else num_ports = (numendpoints-1)/2; return num_ports; } static bool is_listed(const u8 ifnum, const struct sierra_iface_list *list) { int i; if (!list) return false; for (i = 0; i < list->count; i++) { if (list->nums[i] == ifnum) return true; } return false; } static u8 sierra_interface_num(struct usb_serial *serial) { return serial->interface->cur_altsetting->desc.bInterfaceNumber; } static int sierra_probe(struct usb_serial *serial, const struct usb_device_id *id) { const struct sierra_iface_list *ignore_list; int result = 0; struct usb_device *udev; u8 ifnum; udev = serial->dev; ifnum = sierra_interface_num(serial); /* * If this interface supports more than 1 alternate * select the 2nd one */ if (serial->interface->num_altsetting == 2) { dev_dbg(&udev->dev, "Selecting alt setting for interface %d\n", ifnum); /* We know the alternate setting is 1 for the MC8785 */ usb_set_interface(udev, ifnum, 1); } ignore_list = (const struct sierra_iface_list *)id->driver_info; if (is_listed(ifnum, ignore_list)) { dev_dbg(&serial->dev->dev, "Ignoring interface #%d\n", ifnum); return -ENODEV; } return result; } /* interfaces with higher memory requirements */ static const u8 hi_memory_typeA_ifaces[] = { 0, 2 }; static const struct sierra_iface_list typeA_interface_list = { .nums = hi_memory_typeA_ifaces, .count = ARRAY_SIZE(hi_memory_typeA_ifaces), }; static const u8 hi_memory_typeB_ifaces[] = { 3, 4, 5, 6 }; static const struct sierra_iface_list typeB_interface_list = { .nums = hi_memory_typeB_ifaces, .count = ARRAY_SIZE(hi_memory_typeB_ifaces), }; /* 'ignorelist' of interfaces not served by this driver */ static const u8 direct_ip_non_serial_ifaces[] = { 7, 8, 9, 10, 11, 19, 20 }; static const struct sierra_iface_list direct_ip_interface_ignore = { .nums = direct_ip_non_serial_ifaces, .count = ARRAY_SIZE(direct_ip_non_serial_ifaces), }; static const struct usb_device_id id_table[] = { { USB_DEVICE(0x0F3D, 0x0112) }, /* Airprime/Sierra PC 5220 */ { USB_DEVICE(0x03F0, 0x1B1D) }, /* HP ev2200 a.k.a MC5720 */ { USB_DEVICE(0x03F0, 0x211D) }, /* HP ev2210 a.k.a MC5725 */ { USB_DEVICE(0x03F0, 0x1E1D) }, /* HP hs2300 a.k.a MC8775 */ { USB_DEVICE(0x1199, 0x0017) }, /* Sierra Wireless EM5625 */ { USB_DEVICE(0x1199, 0x0018) }, /* Sierra Wireless MC5720 */ { USB_DEVICE(0x1199, 0x0218) }, /* Sierra Wireless MC5720 */ { USB_DEVICE(0x1199, 0x0020) }, /* Sierra Wireless MC5725 */ { USB_DEVICE(0x1199, 0x0220) }, /* Sierra Wireless MC5725 */ { USB_DEVICE(0x1199, 0x0022) }, /* Sierra Wireless EM5725 */ { USB_DEVICE(0x1199, 0x0024) }, /* Sierra Wireless MC5727 */ { USB_DEVICE(0x1199, 0x0224) }, /* Sierra Wireless MC5727 */ { USB_DEVICE(0x1199, 0x0019) }, /* Sierra Wireless AirCard 595 */ { USB_DEVICE(0x1199, 0x0021) }, /* Sierra Wireless AirCard 597E */ { USB_DEVICE(0x1199, 0x0112) }, /* Sierra Wireless AirCard 580 */ { USB_DEVICE(0x1199, 0x0120) }, /* Sierra Wireless USB Dongle 595U */ { USB_DEVICE(0x1199, 0x0301) }, /* Sierra Wireless USB Dongle 250U */ /* Sierra Wireless C597 */ { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x0023, 0xFF, 0xFF, 0xFF) }, /* Sierra Wireless T598 */ { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x0025, 0xFF, 0xFF, 0xFF) }, { USB_DEVICE(0x1199, 0x0026) }, /* Sierra Wireless T11 */ { USB_DEVICE(0x1199, 0x0027) }, /* Sierra Wireless AC402 */ { USB_DEVICE(0x1199, 0x0028) }, /* Sierra Wireless MC5728 */ { USB_DEVICE(0x1199, 0x0029) }, /* Sierra Wireless Device */ { USB_DEVICE(0x1199, 0x6802) }, /* Sierra Wireless MC8755 */ { USB_DEVICE(0x1199, 0x6803) }, /* Sierra Wireless MC8765 */ { USB_DEVICE(0x1199, 0x6804) }, /* Sierra Wireless MC8755 */ { USB_DEVICE(0x1199, 0x6805) }, /* Sierra Wireless MC8765 */ { USB_DEVICE(0x1199, 0x6808) }, /* Sierra Wireless MC8755 */ { USB_DEVICE(0x1199, 0x6809) }, /* Sierra Wireless MC8765 */ { USB_DEVICE(0x1199, 0x6812) }, /* Sierra Wireless MC8775 & AC 875U */ { USB_DEVICE(0x1199, 0x6813) }, /* Sierra Wireless MC8775 */ { USB_DEVICE(0x1199, 0x6815) }, /* Sierra Wireless MC8775 */ { USB_DEVICE(0x1199, 0x6816) }, /* Sierra Wireless MC8775 */ { USB_DEVICE(0x1199, 0x6820) }, /* Sierra Wireless AirCard 875 */ { USB_DEVICE(0x1199, 0x6821) }, /* Sierra Wireless AirCard 875U */ { USB_DEVICE(0x1199, 0x6822) }, /* Sierra Wireless AirCard 875E */ { USB_DEVICE(0x1199, 0x6832) }, /* Sierra Wireless MC8780 */ { USB_DEVICE(0x1199, 0x6833) }, /* Sierra Wireless MC8781 */ { USB_DEVICE(0x1199, 0x6834) }, /* Sierra Wireless MC8780 */ { USB_DEVICE(0x1199, 0x6835) }, /* Sierra Wireless MC8781 */ { USB_DEVICE(0x1199, 0x6838) }, /* Sierra Wireless MC8780 */ { USB_DEVICE(0x1199, 0x6839) }, /* Sierra Wireless MC8781 */ { USB_DEVICE(0x1199, 0x683A) }, /* Sierra Wireless MC8785 */ { USB_DEVICE(0x1199, 0x683B) }, /* Sierra Wireless MC8785 Composite */ /* Sierra Wireless MC8790, MC8791, MC8792 Composite */ { USB_DEVICE(0x1199, 0x683C) }, { USB_DEVICE(0x1199, 0x683D) }, /* Sierra Wireless MC8791 Composite */ /* Sierra Wireless MC8790, MC8791, MC8792 */ { USB_DEVICE(0x1199, 0x683E) }, { USB_DEVICE(0x1199, 0x6850) }, /* Sierra Wireless AirCard 880 */ { USB_DEVICE(0x1199, 0x6851) }, /* Sierra Wireless AirCard 881 */ { USB_DEVICE(0x1199, 0x6852) }, /* Sierra Wireless AirCard 880 E */ { USB_DEVICE(0x1199, 0x6853) }, /* Sierra Wireless AirCard 881 E */ { USB_DEVICE(0x1199, 0x6855) }, /* Sierra Wireless AirCard 880 U */ { USB_DEVICE(0x1199, 0x6856) }, /* Sierra Wireless AirCard 881 U */ { USB_DEVICE(0x1199, 0x6859) }, /* Sierra Wireless AirCard 885 E */ { USB_DEVICE(0x1199, 0x685A) }, /* Sierra Wireless AirCard 885 E */ /* Sierra Wireless C885 */ { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x6880, 0xFF, 0xFF, 0xFF)}, /* Sierra Wireless C888, Air Card 501, USB 303, USB 304 */ { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x6890, 0xFF, 0xFF, 0xFF)}, /* Sierra Wireless C22/C33 */ { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x6891, 0xFF, 0xFF, 0xFF)}, /* Sierra Wireless HSPA Non-Composite Device */ { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x6892, 0xFF, 0xFF, 0xFF)}, { USB_DEVICE(0x1199, 0x6893) }, /* Sierra Wireless Device */ /* Sierra Wireless Direct IP modems */ { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x68A3, 0xFF, 0xFF, 0xFF), .driver_info = (kernel_ulong_t)&direct_ip_interface_ignore }, { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x68AA, 0xFF, 0xFF, 0xFF), .driver_info = (kernel_ulong_t)&direct_ip_interface_ignore }, { USB_DEVICE(0x1199, 0x68AB) }, /* Sierra Wireless AR8550 */ /* AT&T Direct IP LTE modems */ { USB_DEVICE_AND_INTERFACE_INFO(0x0F3D, 0x68AA, 0xFF, 0xFF, 0xFF), .driver_info = (kernel_ulong_t)&direct_ip_interface_ignore }, /* Airprime/Sierra Wireless Direct IP modems */ { USB_DEVICE_AND_INTERFACE_INFO(0x0F3D, 0x68A3, 0xFF, 0xFF, 0xFF), .driver_info = (kernel_ulong_t)&direct_ip_interface_ignore }, { } }; MODULE_DEVICE_TABLE(usb, id_table); struct sierra_port_private { spinlock_t lock; /* lock the structure */ int outstanding_urbs; /* number of out urbs in flight */ struct usb_anchor active; struct usb_anchor delayed; int num_out_urbs; int num_in_urbs; /* Input endpoints and buffers for this port */ struct urb *in_urbs[N_IN_URB_HM]; /* Settings for the port */ int rts_state; /* Handshaking pins (outputs) */ int dtr_state; int cts_state; /* Handshaking pins (inputs) */ int dsr_state; int dcd_state; int ri_state; }; static int sierra_send_setup(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct sierra_port_private *portdata; __u16 interface = 0; int val = 0; int do_send = 0; int retval; portdata = usb_get_serial_port_data(port); if (portdata->dtr_state) val |= 0x01; if (portdata->rts_state) val |= 0x02; /* If composite device then properly report interface */ if (serial->num_ports == 1) { interface = sierra_interface_num(serial); /* Control message is sent only to interfaces with * interrupt_in endpoints */ if (port->interrupt_in_urb) { /* send control message */ do_send = 1; } } /* Otherwise the need to do non-composite mapping */ else { if (port->bulk_out_endpointAddress == 2) interface = 0; else if (port->bulk_out_endpointAddress == 4) interface = 1; else if (port->bulk_out_endpointAddress == 5) interface = 2; do_send = 1; } if (!do_send) return 0; retval = usb_autopm_get_interface(serial->interface); if (retval < 0) return retval; retval = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), 0x22, 0x21, val, interface, NULL, 0, USB_CTRL_SET_TIMEOUT); usb_autopm_put_interface(serial->interface); return retval; } static int sierra_tiocmget(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; unsigned int value; struct sierra_port_private *portdata; portdata = usb_get_serial_port_data(port); value = ((portdata->rts_state) ? TIOCM_RTS : 0) | ((portdata->dtr_state) ? TIOCM_DTR : 0) | ((portdata->cts_state) ? TIOCM_CTS : 0) | ((portdata->dsr_state) ? TIOCM_DSR : 0) | ((portdata->dcd_state) ? TIOCM_CAR : 0) | ((portdata->ri_state) ? TIOCM_RNG : 0); return value; } static int sierra_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; struct sierra_port_private *portdata; portdata = usb_get_serial_port_data(port); if (set & TIOCM_RTS) portdata->rts_state = 1; if (set & TIOCM_DTR) portdata->dtr_state = 1; if (clear & TIOCM_RTS) portdata->rts_state = 0; if (clear & TIOCM_DTR) portdata->dtr_state = 0; return sierra_send_setup(port); } static void sierra_release_urb(struct urb *urb) { if (urb) { kfree(urb->transfer_buffer); usb_free_urb(urb); } } static void sierra_outdat_callback(struct urb *urb) { struct usb_serial_port *port = urb->context; struct sierra_port_private *portdata = usb_get_serial_port_data(port); struct sierra_intf_private *intfdata; int status = urb->status; unsigned long flags; intfdata = usb_get_serial_data(port->serial); /* free up the transfer buffer, as usb_free_urb() does not do this */ kfree(urb->transfer_buffer); usb_autopm_put_interface_async(port->serial->interface); if (status) dev_dbg(&port->dev, "%s - nonzero write bulk status " "received: %d\n", __func__, status); spin_lock_irqsave(&portdata->lock, flags); --portdata->outstanding_urbs; spin_unlock_irqrestore(&portdata->lock, flags); spin_lock_irqsave(&intfdata->susp_lock, flags); --intfdata->in_flight; spin_unlock_irqrestore(&intfdata->susp_lock, flags); usb_serial_port_softint(port); } /* Write */ static int sierra_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count) { struct sierra_port_private *portdata; struct sierra_intf_private *intfdata; struct usb_serial *serial = port->serial; unsigned long flags; unsigned char *buffer; struct urb *urb; size_t writesize = min_t(size_t, count, MAX_TRANSFER); int retval = 0; /* verify that we actually have some data to write */ if (count == 0) return 0; portdata = usb_get_serial_port_data(port); intfdata = usb_get_serial_data(serial); dev_dbg(&port->dev, "%s: write (%zd bytes)\n", __func__, writesize); spin_lock_irqsave(&portdata->lock, flags); dev_dbg(&port->dev, "%s - outstanding_urbs: %d\n", __func__, portdata->outstanding_urbs); if (portdata->outstanding_urbs > portdata->num_out_urbs) { spin_unlock_irqrestore(&portdata->lock, flags); dev_dbg(&port->dev, "%s - write limit hit\n", __func__); return 0; } portdata->outstanding_urbs++; dev_dbg(&port->dev, "%s - 1, outstanding_urbs: %d\n", __func__, portdata->outstanding_urbs); spin_unlock_irqrestore(&portdata->lock, flags); retval = usb_autopm_get_interface_async(serial->interface); if (retval < 0) { spin_lock_irqsave(&portdata->lock, flags); portdata->outstanding_urbs--; spin_unlock_irqrestore(&portdata->lock, flags); goto error_simple; } buffer = kmemdup(buf, writesize, GFP_ATOMIC); if (!buffer) { retval = -ENOMEM; goto error_no_buffer; } urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) { retval = -ENOMEM; goto error_no_urb; } usb_serial_debug_data(&port->dev, __func__, writesize, buffer); usb_fill_bulk_urb(urb, serial->dev, usb_sndbulkpipe(serial->dev, port->bulk_out_endpointAddress), buffer, writesize, sierra_outdat_callback, port); /* Handle the need to send a zero length packet */ urb->transfer_flags |= URB_ZERO_PACKET; spin_lock_irqsave(&intfdata->susp_lock, flags); if (intfdata->suspended) { usb_anchor_urb(urb, &portdata->delayed); spin_unlock_irqrestore(&intfdata->susp_lock, flags); goto skip_power; } else { usb_anchor_urb(urb, &portdata->active); } /* send it down the pipe */ retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) { usb_unanchor_urb(urb); spin_unlock_irqrestore(&intfdata->susp_lock, flags); dev_err(&port->dev, "%s - usb_submit_urb(write bulk) failed " "with status = %d\n", __func__, retval); goto error; } else { intfdata->in_flight++; spin_unlock_irqrestore(&intfdata->susp_lock, flags); } skip_power: /* we are done with this urb, so let the host driver * really free it when it is finished with it */ usb_free_urb(urb); return writesize; error: usb_free_urb(urb); error_no_urb: kfree(buffer); error_no_buffer: spin_lock_irqsave(&portdata->lock, flags); --portdata->outstanding_urbs; dev_dbg(&port->dev, "%s - 2. outstanding_urbs: %d\n", __func__, portdata->outstanding_urbs); spin_unlock_irqrestore(&portdata->lock, flags); usb_autopm_put_interface_async(serial->interface); error_simple: return retval; } static void sierra_indat_callback(struct urb *urb) { int err; int endpoint; struct usb_serial_port *port; unsigned char *data = urb->transfer_buffer; int status = urb->status; endpoint = usb_pipeendpoint(urb->pipe); port = urb->context; if (status) { dev_dbg(&port->dev, "%s: nonzero status: %d on" " endpoint %02x\n", __func__, status, endpoint); } else { if (urb->actual_length) { tty_insert_flip_string(&port->port, data, urb->actual_length); tty_flip_buffer_push(&port->port); usb_serial_debug_data(&port->dev, __func__, urb->actual_length, data); } else { dev_dbg(&port->dev, "%s: empty read urb" " received\n", __func__); } } /* Resubmit urb so we continue receiving */ if (status != -ESHUTDOWN && status != -EPERM) { usb_mark_last_busy(port->serial->dev); err = usb_submit_urb(urb, GFP_ATOMIC); if (err && err != -EPERM) dev_err(&port->dev, "resubmit read urb failed." "(%d)\n", err); } } static void sierra_instat_callback(struct urb *urb) { int err; int status = urb->status; struct usb_serial_port *port = urb->context; struct sierra_port_private *portdata = usb_get_serial_port_data(port); struct usb_serial *serial = port->serial; dev_dbg(&port->dev, "%s: urb %p port %p has data %p\n", __func__, urb, port, portdata); if (status == 0) { struct usb_ctrlrequest *req_pkt = urb->transfer_buffer; if (!req_pkt) { dev_dbg(&port->dev, "%s: NULL req_pkt\n", __func__); return; } if ((req_pkt->bRequestType == 0xA1) && (req_pkt->bRequest == 0x20)) { int old_dcd_state; unsigned char signals = *((unsigned char *) urb->transfer_buffer + sizeof(struct usb_ctrlrequest)); dev_dbg(&port->dev, "%s: signal x%x\n", __func__, signals); old_dcd_state = portdata->dcd_state; portdata->cts_state = 1; portdata->dcd_state = ((signals & 0x01) ? 1 : 0); portdata->dsr_state = ((signals & 0x02) ? 1 : 0); portdata->ri_state = ((signals & 0x08) ? 1 : 0); if (old_dcd_state && !portdata->dcd_state) tty_port_tty_hangup(&port->port, true); } else { dev_dbg(&port->dev, "%s: type %x req %x\n", __func__, req_pkt->bRequestType, req_pkt->bRequest); } } else dev_dbg(&port->dev, "%s: error %d\n", __func__, status); /* Resubmit urb so we continue receiving IRQ data */ if (status != -ESHUTDOWN && status != -ENOENT) { usb_mark_last_busy(serial->dev); err = usb_submit_urb(urb, GFP_ATOMIC); if (err && err != -EPERM) dev_err(&port->dev, "%s: resubmit intr urb " "failed. (%d)\n", __func__, err); } } static unsigned int sierra_write_room(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct sierra_port_private *portdata = usb_get_serial_port_data(port); unsigned long flags; /* try to give a good number back based on if we have any free urbs at * this point in time */ spin_lock_irqsave(&portdata->lock, flags); if (portdata->outstanding_urbs > (portdata->num_out_urbs * 2) / 3) { spin_unlock_irqrestore(&portdata->lock, flags); dev_dbg(&port->dev, "%s - write limit hit\n", __func__); return 0; } spin_unlock_irqrestore(&portdata->lock, flags); return 2048; } static unsigned int sierra_chars_in_buffer(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct sierra_port_private *portdata = usb_get_serial_port_data(port); unsigned long flags; unsigned int chars; /* NOTE: This overcounts somewhat. */ spin_lock_irqsave(&portdata->lock, flags); chars = portdata->outstanding_urbs * MAX_TRANSFER; spin_unlock_irqrestore(&portdata->lock, flags); dev_dbg(&port->dev, "%s - %u\n", __func__, chars); return chars; } static void sierra_stop_rx_urbs(struct usb_serial_port *port) { int i; struct sierra_port_private *portdata = usb_get_serial_port_data(port); for (i = 0; i < portdata->num_in_urbs; i++) usb_kill_urb(portdata->in_urbs[i]); usb_kill_urb(port->interrupt_in_urb); } static int sierra_submit_rx_urbs(struct usb_serial_port *port, gfp_t mem_flags) { int ok_cnt; int err = -EINVAL; int i; struct urb *urb; struct sierra_port_private *portdata = usb_get_serial_port_data(port); ok_cnt = 0; for (i = 0; i < portdata->num_in_urbs; i++) { urb = portdata->in_urbs[i]; if (!urb) continue; err = usb_submit_urb(urb, mem_flags); if (err) { dev_err(&port->dev, "%s: submit urb failed: %d\n", __func__, err); } else { ok_cnt++; } } if (ok_cnt && port->interrupt_in_urb) { err = usb_submit_urb(port->interrupt_in_urb, mem_flags); if (err) { dev_err(&port->dev, "%s: submit intr urb failed: %d\n", __func__, err); } } if (ok_cnt > 0) /* at least one rx urb submitted */ return 0; else return err; } static struct urb *sierra_setup_urb(struct usb_serial *serial, int endpoint, int dir, void *ctx, int len, gfp_t mem_flags, usb_complete_t callback) { struct urb *urb; u8 *buf; urb = usb_alloc_urb(0, mem_flags); if (!urb) return NULL; buf = kmalloc(len, mem_flags); if (buf) { /* Fill URB using supplied data */ usb_fill_bulk_urb(urb, serial->dev, usb_sndbulkpipe(serial->dev, endpoint) | dir, buf, len, callback, ctx); dev_dbg(&serial->dev->dev, "%s %c u : %p d:%p\n", __func__, dir == USB_DIR_IN ? 'i' : 'o', urb, buf); } else { sierra_release_urb(urb); urb = NULL; } return urb; } static void sierra_close(struct usb_serial_port *port) { int i; struct usb_serial *serial = port->serial; struct sierra_port_private *portdata; struct sierra_intf_private *intfdata = usb_get_serial_data(serial); struct urb *urb; portdata = usb_get_serial_port_data(port); /* * Need to take susp_lock to make sure port is not already being * resumed, but no need to hold it due to the tty-port initialized * flag. */ spin_lock_irq(&intfdata->susp_lock); if (--intfdata->open_ports == 0) serial->interface->needs_remote_wakeup = 0; spin_unlock_irq(&intfdata->susp_lock); for (;;) { urb = usb_get_from_anchor(&portdata->delayed); if (!urb) break; kfree(urb->transfer_buffer); usb_free_urb(urb); usb_autopm_put_interface_async(serial->interface); spin_lock_irq(&portdata->lock); portdata->outstanding_urbs--; spin_unlock_irq(&portdata->lock); } sierra_stop_rx_urbs(port); usb_kill_anchored_urbs(&portdata->active); for (i = 0; i < portdata->num_in_urbs; i++) { sierra_release_urb(portdata->in_urbs[i]); portdata->in_urbs[i] = NULL; } usb_autopm_get_interface_no_resume(serial->interface); } static int sierra_open(struct tty_struct *tty, struct usb_serial_port *port) { struct sierra_port_private *portdata; struct usb_serial *serial = port->serial; struct sierra_intf_private *intfdata = usb_get_serial_data(serial); int i; int err; int endpoint; struct urb *urb; portdata = usb_get_serial_port_data(port); endpoint = port->bulk_in_endpointAddress; for (i = 0; i < portdata->num_in_urbs; i++) { urb = sierra_setup_urb(serial, endpoint, USB_DIR_IN, port, IN_BUFLEN, GFP_KERNEL, sierra_indat_callback); portdata->in_urbs[i] = urb; } /* clear halt condition */ usb_clear_halt(serial->dev, usb_sndbulkpipe(serial->dev, endpoint) | USB_DIR_IN); err = sierra_submit_rx_urbs(port, GFP_KERNEL); if (err) goto err_submit; spin_lock_irq(&intfdata->susp_lock); if (++intfdata->open_ports == 1) serial->interface->needs_remote_wakeup = 1; spin_unlock_irq(&intfdata->susp_lock); usb_autopm_put_interface(serial->interface); return 0; err_submit: sierra_stop_rx_urbs(port); for (i = 0; i < portdata->num_in_urbs; i++) { sierra_release_urb(portdata->in_urbs[i]); portdata->in_urbs[i] = NULL; } return err; } static void sierra_dtr_rts(struct usb_serial_port *port, int on) { struct sierra_port_private *portdata; portdata = usb_get_serial_port_data(port); portdata->rts_state = on; portdata->dtr_state = on; sierra_send_setup(port); } static int sierra_startup(struct usb_serial *serial) { struct sierra_intf_private *intfdata; intfdata = kzalloc(sizeof(*intfdata), GFP_KERNEL); if (!intfdata) return -ENOMEM; spin_lock_init(&intfdata->susp_lock); usb_set_serial_data(serial, intfdata); /* Set Device mode to D0 */ sierra_set_power_state(serial->dev, 0x0000); /* Check NMEA and set */ if (nmea) sierra_vsc_set_nmea(serial->dev, 1); return 0; } static void sierra_release(struct usb_serial *serial) { struct sierra_intf_private *intfdata; intfdata = usb_get_serial_data(serial); kfree(intfdata); } static int sierra_port_probe(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct sierra_port_private *portdata; const struct sierra_iface_list *himemory_list; u8 ifnum; portdata = kzalloc(sizeof(*portdata), GFP_KERNEL); if (!portdata) return -ENOMEM; spin_lock_init(&portdata->lock); init_usb_anchor(&portdata->active); init_usb_anchor(&portdata->delayed); /* Assume low memory requirements */ portdata->num_out_urbs = N_OUT_URB; portdata->num_in_urbs = N_IN_URB; /* Determine actual memory requirements */ if (serial->num_ports == 1) { /* Get interface number for composite device */ ifnum = sierra_interface_num(serial); himemory_list = &typeB_interface_list; } else { /* This is really the usb-serial port number of the interface * rather than the interface number. */ ifnum = port->port_number; himemory_list = &typeA_interface_list; } if (is_listed(ifnum, himemory_list)) { portdata->num_out_urbs = N_OUT_URB_HM; portdata->num_in_urbs = N_IN_URB_HM; } dev_dbg(&port->dev, "Memory usage (urbs) interface #%d, in=%d, out=%d\n", ifnum, portdata->num_in_urbs, portdata->num_out_urbs); usb_set_serial_port_data(port, portdata); return 0; } static void sierra_port_remove(struct usb_serial_port *port) { struct sierra_port_private *portdata; portdata = usb_get_serial_port_data(port); usb_set_serial_port_data(port, NULL); kfree(portdata); } #ifdef CONFIG_PM static void stop_read_write_urbs(struct usb_serial *serial) { int i; struct usb_serial_port *port; struct sierra_port_private *portdata; /* Stop reading/writing urbs */ for (i = 0; i < serial->num_ports; ++i) { port = serial->port[i]; portdata = usb_get_serial_port_data(port); if (!portdata) continue; sierra_stop_rx_urbs(port); usb_kill_anchored_urbs(&portdata->active); } } static int sierra_suspend(struct usb_serial *serial, pm_message_t message) { struct sierra_intf_private *intfdata = usb_get_serial_data(serial); spin_lock_irq(&intfdata->susp_lock); if (PMSG_IS_AUTO(message)) { if (intfdata->in_flight) { spin_unlock_irq(&intfdata->susp_lock); return -EBUSY; } } intfdata->suspended = 1; spin_unlock_irq(&intfdata->susp_lock); stop_read_write_urbs(serial); return 0; } /* Caller must hold susp_lock. */ static int sierra_submit_delayed_urbs(struct usb_serial_port *port) { struct sierra_port_private *portdata = usb_get_serial_port_data(port); struct sierra_intf_private *intfdata; struct urb *urb; int ec = 0; int err; intfdata = usb_get_serial_data(port->serial); for (;;) { urb = usb_get_from_anchor(&portdata->delayed); if (!urb) break; usb_anchor_urb(urb, &portdata->active); intfdata->in_flight++; err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { dev_err(&port->dev, "%s - submit urb failed: %d", __func__, err); ec++; intfdata->in_flight--; usb_unanchor_urb(urb); kfree(urb->transfer_buffer); usb_free_urb(urb); spin_lock(&portdata->lock); portdata->outstanding_urbs--; spin_unlock(&portdata->lock); } } if (ec) return -EIO; return 0; } static int sierra_resume(struct usb_serial *serial) { struct usb_serial_port *port; struct sierra_intf_private *intfdata = usb_get_serial_data(serial); int ec = 0; int i, err; spin_lock_irq(&intfdata->susp_lock); for (i = 0; i < serial->num_ports; i++) { port = serial->port[i]; if (!tty_port_initialized(&port->port)) continue; err = sierra_submit_delayed_urbs(port); if (err) ec++; err = sierra_submit_rx_urbs(port, GFP_ATOMIC); if (err) ec++; } intfdata->suspended = 0; spin_unlock_irq(&intfdata->susp_lock); return ec ? -EIO : 0; } #else #define sierra_suspend NULL #define sierra_resume NULL #endif static struct usb_serial_driver sierra_device = { .driver = { .name = "sierra", }, .description = "Sierra USB modem", .id_table = id_table, .calc_num_ports = sierra_calc_num_ports, .probe = sierra_probe, .open = sierra_open, .close = sierra_close, .dtr_rts = sierra_dtr_rts, .write = sierra_write, .write_room = sierra_write_room, .chars_in_buffer = sierra_chars_in_buffer, .tiocmget = sierra_tiocmget, .tiocmset = sierra_tiocmset, .attach = sierra_startup, .release = sierra_release, .port_probe = sierra_port_probe, .port_remove = sierra_port_remove, .suspend = sierra_suspend, .resume = sierra_resume, .read_int_callback = sierra_instat_callback, }; static struct usb_serial_driver * const serial_drivers[] = { &sierra_device, NULL }; module_usb_serial_driver(serial_drivers, id_table); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL v2"); module_param(nmea, bool, 0644); MODULE_PARM_DESC(nmea, "NMEA streaming"); |
| 92 89 89 89 91 75 75 75 74 74 5 5 5 5 5 5 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 | // SPDX-License-Identifier: GPL-2.0 /* * DMA memory management for framework level HCD code (hc_driver) * * This implementation plugs in through generic "usb_bus" level methods, * and should work with all USB controllers, regardless of bus type. * * Released under the GPLv2 only. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/device.h> #include <linux/mm.h> #include <linux/io.h> #include <linux/dma-mapping.h> #include <linux/dmapool.h> #include <linux/genalloc.h> #include <linux/usb.h> #include <linux/usb/hcd.h> /* * DMA-Coherent Buffers */ /* FIXME tune these based on pool statistics ... */ static size_t pool_max[HCD_BUFFER_POOLS] = { 32, 128, 512, 2048, }; void __init usb_init_pool_max(void) { /* * The pool_max values must never be smaller than * ARCH_DMA_MINALIGN. */ if (ARCH_DMA_MINALIGN <= 32) ; /* Original value is okay */ else if (ARCH_DMA_MINALIGN <= 64) pool_max[0] = 64; else if (ARCH_DMA_MINALIGN <= 128) pool_max[0] = 0; /* Don't use this pool */ else BUILD_BUG(); /* We don't allow this */ } /* SETUP primitives */ /** * hcd_buffer_create - initialize buffer pools * @hcd: the bus whose buffer pools are to be initialized * * Context: task context, might sleep * * Call this as part of initializing a host controller that uses the dma * memory allocators. It initializes some pools of dma-coherent memory that * will be shared by all drivers using that controller. * * Call hcd_buffer_destroy() to clean up after using those pools. * * Return: 0 if successful. A negative errno value otherwise. */ int hcd_buffer_create(struct usb_hcd *hcd) { char name[16]; int i, size; if (hcd->localmem_pool || !hcd_uses_dma(hcd)) return 0; for (i = 0; i < HCD_BUFFER_POOLS; i++) { size = pool_max[i]; if (!size) continue; snprintf(name, sizeof(name), "buffer-%d", size); hcd->pool[i] = dma_pool_create(name, hcd->self.sysdev, size, size, 0); if (!hcd->pool[i]) { hcd_buffer_destroy(hcd); return -ENOMEM; } } return 0; } /** * hcd_buffer_destroy - deallocate buffer pools * @hcd: the bus whose buffer pools are to be destroyed * * Context: task context, might sleep * * This frees the buffer pools created by hcd_buffer_create(). */ void hcd_buffer_destroy(struct usb_hcd *hcd) { int i; if (!IS_ENABLED(CONFIG_HAS_DMA)) return; for (i = 0; i < HCD_BUFFER_POOLS; i++) { dma_pool_destroy(hcd->pool[i]); hcd->pool[i] = NULL; } } /* sometimes alloc/free could use kmalloc with GFP_DMA, for * better sharing and to leverage mm/slab.c intelligence. */ void *hcd_buffer_alloc( struct usb_bus *bus, size_t size, gfp_t mem_flags, dma_addr_t *dma ) { struct usb_hcd *hcd = bus_to_hcd(bus); int i; if (size == 0) return NULL; if (hcd->localmem_pool) return gen_pool_dma_alloc(hcd->localmem_pool, size, dma); /* some USB hosts just use PIO */ if (!hcd_uses_dma(hcd)) { *dma = ~(dma_addr_t) 0; return kmalloc(size, mem_flags); } for (i = 0; i < HCD_BUFFER_POOLS; i++) { if (size <= pool_max[i]) return dma_pool_alloc(hcd->pool[i], mem_flags, dma); } return dma_alloc_coherent(hcd->self.sysdev, size, dma, mem_flags); } void hcd_buffer_free( struct usb_bus *bus, size_t size, void *addr, dma_addr_t dma ) { struct usb_hcd *hcd = bus_to_hcd(bus); int i; if (!addr) return; if (hcd->localmem_pool) { gen_pool_free(hcd->localmem_pool, (unsigned long)addr, size); return; } if (!hcd_uses_dma(hcd)) { kfree(addr); return; } for (i = 0; i < HCD_BUFFER_POOLS; i++) { if (size <= pool_max[i]) { dma_pool_free(hcd->pool[i], addr, dma); return; } } dma_free_coherent(hcd->self.sysdev, size, addr, dma); } void *hcd_buffer_alloc_pages(struct usb_hcd *hcd, size_t size, gfp_t mem_flags, dma_addr_t *dma) { if (size == 0) return NULL; if (hcd->localmem_pool) return gen_pool_dma_alloc_align(hcd->localmem_pool, size, dma, PAGE_SIZE); /* some USB hosts just use PIO */ if (!hcd_uses_dma(hcd)) { *dma = DMA_MAPPING_ERROR; return (void *)__get_free_pages(mem_flags, get_order(size)); } return dma_alloc_coherent(hcd->self.sysdev, size, dma, mem_flags); } void hcd_buffer_free_pages(struct usb_hcd *hcd, size_t size, void *addr, dma_addr_t dma) { if (!addr) return; if (hcd->localmem_pool) { gen_pool_free(hcd->localmem_pool, (unsigned long)addr, size); return; } if (!hcd_uses_dma(hcd)) { free_pages((unsigned long)addr, get_order(size)); return; } dma_free_coherent(hcd->self.sysdev, size, addr, dma); } |
| 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 | /* * Copyright (c) 2014 Chelsio, Inc. All rights reserved. * Copyright (c) 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "iwpm_util.h" #define IWPM_MAPINFO_HASH_SIZE 512 #define IWPM_MAPINFO_HASH_MASK (IWPM_MAPINFO_HASH_SIZE - 1) #define IWPM_REMINFO_HASH_SIZE 64 #define IWPM_REMINFO_HASH_MASK (IWPM_REMINFO_HASH_SIZE - 1) #define IWPM_MSG_SIZE 512 static LIST_HEAD(iwpm_nlmsg_req_list); static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock); static struct hlist_head *iwpm_hash_bucket; static DEFINE_SPINLOCK(iwpm_mapinfo_lock); static struct hlist_head *iwpm_reminfo_bucket; static DEFINE_SPINLOCK(iwpm_reminfo_lock); static struct iwpm_admin_data iwpm_admin; /** * iwpm_init - Allocate resources for the iwarp port mapper * @nl_client: The index of the netlink client * * Should be called when network interface goes up. */ int iwpm_init(u8 nl_client) { iwpm_hash_bucket = kcalloc(IWPM_MAPINFO_HASH_SIZE, sizeof(struct hlist_head), GFP_KERNEL); if (!iwpm_hash_bucket) return -ENOMEM; iwpm_reminfo_bucket = kcalloc(IWPM_REMINFO_HASH_SIZE, sizeof(struct hlist_head), GFP_KERNEL); if (!iwpm_reminfo_bucket) { kfree(iwpm_hash_bucket); return -ENOMEM; } iwpm_set_registration(nl_client, IWPM_REG_UNDEF); pr_debug("%s: Mapinfo and reminfo tables are created\n", __func__); return 0; } static void free_hash_bucket(void); static void free_reminfo_bucket(void); /** * iwpm_exit - Deallocate resources for the iwarp port mapper * @nl_client: The index of the netlink client * * Should be called when network interface goes down. */ int iwpm_exit(u8 nl_client) { free_hash_bucket(); free_reminfo_bucket(); pr_debug("%s: Resources are destroyed\n", __func__); iwpm_set_registration(nl_client, IWPM_REG_UNDEF); return 0; } static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *, struct sockaddr_storage *); /** * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address * info in a hash table * @local_sockaddr: Local ip/tcp address * @mapped_sockaddr: Mapped local ip/tcp address * @nl_client: The index of the netlink client * @map_flags: IWPM mapping flags */ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, struct sockaddr_storage *mapped_sockaddr, u8 nl_client, u32 map_flags) { struct hlist_head *hash_bucket_head = NULL; struct iwpm_mapping_info *map_info; unsigned long flags; int ret = -EINVAL; map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL); if (!map_info) return -ENOMEM; memcpy(&map_info->local_sockaddr, local_sockaddr, sizeof(struct sockaddr_storage)); memcpy(&map_info->mapped_sockaddr, mapped_sockaddr, sizeof(struct sockaddr_storage)); map_info->nl_client = nl_client; map_info->map_flags = map_flags; spin_lock_irqsave(&iwpm_mapinfo_lock, flags); if (iwpm_hash_bucket) { hash_bucket_head = get_mapinfo_hash_bucket( &map_info->local_sockaddr, &map_info->mapped_sockaddr); if (hash_bucket_head) { hlist_add_head(&map_info->hlist_node, hash_bucket_head); ret = 0; } } spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); if (!hash_bucket_head) kfree(map_info); return ret; } /** * iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address * info from the hash table * @local_sockaddr: Local ip/tcp address * @mapped_local_addr: Mapped local ip/tcp address * * Returns err code if mapping info is not found in the hash table, * otherwise returns 0 */ int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr, struct sockaddr_storage *mapped_local_addr) { struct hlist_node *tmp_hlist_node; struct hlist_head *hash_bucket_head; struct iwpm_mapping_info *map_info = NULL; unsigned long flags; int ret = -EINVAL; spin_lock_irqsave(&iwpm_mapinfo_lock, flags); if (iwpm_hash_bucket) { hash_bucket_head = get_mapinfo_hash_bucket( local_sockaddr, mapped_local_addr); if (!hash_bucket_head) goto remove_mapinfo_exit; hlist_for_each_entry_safe(map_info, tmp_hlist_node, hash_bucket_head, hlist_node) { if (!iwpm_compare_sockaddr(&map_info->mapped_sockaddr, mapped_local_addr)) { hlist_del_init(&map_info->hlist_node); kfree(map_info); ret = 0; break; } } } remove_mapinfo_exit: spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); return ret; } static void free_hash_bucket(void) { struct hlist_node *tmp_hlist_node; struct iwpm_mapping_info *map_info; unsigned long flags; int i; /* remove all the mapinfo data from the list */ spin_lock_irqsave(&iwpm_mapinfo_lock, flags); for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) { hlist_for_each_entry_safe(map_info, tmp_hlist_node, &iwpm_hash_bucket[i], hlist_node) { hlist_del_init(&map_info->hlist_node); kfree(map_info); } } /* free the hash list */ kfree(iwpm_hash_bucket); iwpm_hash_bucket = NULL; spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); } static void free_reminfo_bucket(void) { struct hlist_node *tmp_hlist_node; struct iwpm_remote_info *rem_info; unsigned long flags; int i; /* remove all the remote info from the list */ spin_lock_irqsave(&iwpm_reminfo_lock, flags); for (i = 0; i < IWPM_REMINFO_HASH_SIZE; i++) { hlist_for_each_entry_safe(rem_info, tmp_hlist_node, &iwpm_reminfo_bucket[i], hlist_node) { hlist_del_init(&rem_info->hlist_node); kfree(rem_info); } } /* free the hash list */ kfree(iwpm_reminfo_bucket); iwpm_reminfo_bucket = NULL; spin_unlock_irqrestore(&iwpm_reminfo_lock, flags); } static struct hlist_head *get_reminfo_hash_bucket(struct sockaddr_storage *, struct sockaddr_storage *); void iwpm_add_remote_info(struct iwpm_remote_info *rem_info) { struct hlist_head *hash_bucket_head; unsigned long flags; spin_lock_irqsave(&iwpm_reminfo_lock, flags); if (iwpm_reminfo_bucket) { hash_bucket_head = get_reminfo_hash_bucket( &rem_info->mapped_loc_sockaddr, &rem_info->mapped_rem_sockaddr); if (hash_bucket_head) hlist_add_head(&rem_info->hlist_node, hash_bucket_head); } spin_unlock_irqrestore(&iwpm_reminfo_lock, flags); } /** * iwpm_get_remote_info - Get the remote connecting peer address info * * @mapped_loc_addr: Mapped local address of the listening peer * @mapped_rem_addr: Mapped remote address of the connecting peer * @remote_addr: To store the remote address of the connecting peer * @nl_client: The index of the netlink client * * The remote address info is retrieved and provided to the client in * the remote_addr. After that it is removed from the hash table */ int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr, struct sockaddr_storage *mapped_rem_addr, struct sockaddr_storage *remote_addr, u8 nl_client) { struct hlist_node *tmp_hlist_node; struct hlist_head *hash_bucket_head; struct iwpm_remote_info *rem_info = NULL; unsigned long flags; int ret = -EINVAL; spin_lock_irqsave(&iwpm_reminfo_lock, flags); if (iwpm_reminfo_bucket) { hash_bucket_head = get_reminfo_hash_bucket( mapped_loc_addr, mapped_rem_addr); if (!hash_bucket_head) goto get_remote_info_exit; hlist_for_each_entry_safe(rem_info, tmp_hlist_node, hash_bucket_head, hlist_node) { if (!iwpm_compare_sockaddr(&rem_info->mapped_loc_sockaddr, mapped_loc_addr) && !iwpm_compare_sockaddr(&rem_info->mapped_rem_sockaddr, mapped_rem_addr)) { memcpy(remote_addr, &rem_info->remote_sockaddr, sizeof(struct sockaddr_storage)); iwpm_print_sockaddr(remote_addr, "get_remote_info: Remote sockaddr:"); hlist_del_init(&rem_info->hlist_node); kfree(rem_info); ret = 0; break; } } } get_remote_info_exit: spin_unlock_irqrestore(&iwpm_reminfo_lock, flags); return ret; } struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq, u8 nl_client, gfp_t gfp) { struct iwpm_nlmsg_request *nlmsg_request; unsigned long flags; nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp); if (!nlmsg_request) return NULL; spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags); list_add_tail(&nlmsg_request->inprocess_list, &iwpm_nlmsg_req_list); spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags); kref_init(&nlmsg_request->kref); kref_get(&nlmsg_request->kref); nlmsg_request->nlmsg_seq = nlmsg_seq; nlmsg_request->nl_client = nl_client; nlmsg_request->request_done = 0; nlmsg_request->err_code = 0; sema_init(&nlmsg_request->sem, 1); down(&nlmsg_request->sem); return nlmsg_request; } void iwpm_free_nlmsg_request(struct kref *kref) { struct iwpm_nlmsg_request *nlmsg_request; unsigned long flags; nlmsg_request = container_of(kref, struct iwpm_nlmsg_request, kref); spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags); list_del_init(&nlmsg_request->inprocess_list); spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags); if (!nlmsg_request->request_done) pr_debug("%s Freeing incomplete nlmsg request (seq = %u).\n", __func__, nlmsg_request->nlmsg_seq); kfree(nlmsg_request); } struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq) { struct iwpm_nlmsg_request *nlmsg_request; struct iwpm_nlmsg_request *found_request = NULL; unsigned long flags; spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags); list_for_each_entry(nlmsg_request, &iwpm_nlmsg_req_list, inprocess_list) { if (nlmsg_request->nlmsg_seq == echo_seq) { found_request = nlmsg_request; kref_get(&nlmsg_request->kref); break; } } spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags); return found_request; } int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request) { int ret; ret = down_timeout(&nlmsg_request->sem, IWPM_NL_TIMEOUT); if (ret) { ret = -EINVAL; pr_info("%s: Timeout %d sec for netlink request (seq = %u)\n", __func__, (IWPM_NL_TIMEOUT/HZ), nlmsg_request->nlmsg_seq); } else { ret = nlmsg_request->err_code; } kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); return ret; } int iwpm_get_nlmsg_seq(void) { return atomic_inc_return(&iwpm_admin.nlmsg_seq); } /* valid client */ u32 iwpm_get_registration(u8 nl_client) { return iwpm_admin.reg_list[nl_client]; } /* valid client */ void iwpm_set_registration(u8 nl_client, u32 reg) { iwpm_admin.reg_list[nl_client] = reg; } /* valid client */ u32 iwpm_check_registration(u8 nl_client, u32 reg) { return (iwpm_get_registration(nl_client) & reg); } int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr, struct sockaddr_storage *b_sockaddr) { if (a_sockaddr->ss_family != b_sockaddr->ss_family) return 1; if (a_sockaddr->ss_family == AF_INET) { struct sockaddr_in *a4_sockaddr = (struct sockaddr_in *)a_sockaddr; struct sockaddr_in *b4_sockaddr = (struct sockaddr_in *)b_sockaddr; if (!memcmp(&a4_sockaddr->sin_addr, &b4_sockaddr->sin_addr, sizeof(struct in_addr)) && a4_sockaddr->sin_port == b4_sockaddr->sin_port) return 0; } else if (a_sockaddr->ss_family == AF_INET6) { struct sockaddr_in6 *a6_sockaddr = (struct sockaddr_in6 *)a_sockaddr; struct sockaddr_in6 *b6_sockaddr = (struct sockaddr_in6 *)b_sockaddr; if (!memcmp(&a6_sockaddr->sin6_addr, &b6_sockaddr->sin6_addr, sizeof(struct in6_addr)) && a6_sockaddr->sin6_port == b6_sockaddr->sin6_port) return 0; } else { pr_err("%s: Invalid sockaddr family\n", __func__); } return 1; } struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh, int nl_client) { struct sk_buff *skb = NULL; skb = dev_alloc_skb(IWPM_MSG_SIZE); if (!skb) goto create_nlmsg_exit; if (!(ibnl_put_msg(skb, nlh, 0, 0, nl_client, nl_op, NLM_F_REQUEST))) { pr_warn("%s: Unable to put the nlmsg header\n", __func__); dev_kfree_skb(skb); skb = NULL; } create_nlmsg_exit: return skb; } int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max, const struct nla_policy *nlmsg_policy, struct nlattr *nltb[], const char *msg_type) { int nlh_len = 0; int ret; const char *err_str = ""; ret = nlmsg_validate_deprecated(cb->nlh, nlh_len, policy_max - 1, nlmsg_policy, NULL); if (ret) { err_str = "Invalid attribute"; goto parse_nlmsg_error; } ret = nlmsg_parse_deprecated(cb->nlh, nlh_len, nltb, policy_max - 1, nlmsg_policy, NULL); if (ret) { err_str = "Unable to parse the nlmsg"; goto parse_nlmsg_error; } ret = iwpm_validate_nlmsg_attr(nltb, policy_max); if (ret) { err_str = "Invalid NULL attribute"; goto parse_nlmsg_error; } return 0; parse_nlmsg_error: pr_warn("%s: %s (msg type %s ret = %d)\n", __func__, err_str, msg_type, ret); return ret; } void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg) { struct sockaddr_in6 *sockaddr_v6; struct sockaddr_in *sockaddr_v4; switch (sockaddr->ss_family) { case AF_INET: sockaddr_v4 = (struct sockaddr_in *)sockaddr; pr_debug("%s IPV4 %pI4: %u(0x%04X)\n", msg, &sockaddr_v4->sin_addr, ntohs(sockaddr_v4->sin_port), ntohs(sockaddr_v4->sin_port)); break; case AF_INET6: sockaddr_v6 = (struct sockaddr_in6 *)sockaddr; pr_debug("%s IPV6 %pI6: %u(0x%04X)\n", msg, &sockaddr_v6->sin6_addr, ntohs(sockaddr_v6->sin6_port), ntohs(sockaddr_v6->sin6_port)); break; default: break; } } static u32 iwpm_ipv6_jhash(struct sockaddr_in6 *ipv6_sockaddr) { u32 ipv6_hash = jhash(&ipv6_sockaddr->sin6_addr, sizeof(struct in6_addr), 0); u32 hash = jhash_2words(ipv6_hash, (__force u32) ipv6_sockaddr->sin6_port, 0); return hash; } static u32 iwpm_ipv4_jhash(struct sockaddr_in *ipv4_sockaddr) { u32 ipv4_hash = jhash(&ipv4_sockaddr->sin_addr, sizeof(struct in_addr), 0); u32 hash = jhash_2words(ipv4_hash, (__force u32) ipv4_sockaddr->sin_port, 0); return hash; } static int get_hash_bucket(struct sockaddr_storage *a_sockaddr, struct sockaddr_storage *b_sockaddr, u32 *hash) { u32 a_hash, b_hash; if (a_sockaddr->ss_family == AF_INET) { a_hash = iwpm_ipv4_jhash((struct sockaddr_in *) a_sockaddr); b_hash = iwpm_ipv4_jhash((struct sockaddr_in *) b_sockaddr); } else if (a_sockaddr->ss_family == AF_INET6) { a_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) a_sockaddr); b_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) b_sockaddr); } else { pr_err("%s: Invalid sockaddr family\n", __func__); return -EINVAL; } if (a_hash == b_hash) /* if port mapper isn't available */ *hash = a_hash; else *hash = jhash_2words(a_hash, b_hash, 0); return 0; } static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *local_sockaddr, struct sockaddr_storage *mapped_sockaddr) { u32 hash; int ret; ret = get_hash_bucket(local_sockaddr, mapped_sockaddr, &hash); if (ret) return NULL; return &iwpm_hash_bucket[hash & IWPM_MAPINFO_HASH_MASK]; } static struct hlist_head *get_reminfo_hash_bucket(struct sockaddr_storage *mapped_loc_sockaddr, struct sockaddr_storage *mapped_rem_sockaddr) { u32 hash; int ret; ret = get_hash_bucket(mapped_loc_sockaddr, mapped_rem_sockaddr, &hash); if (ret) return NULL; return &iwpm_reminfo_bucket[hash & IWPM_REMINFO_HASH_MASK]; } static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid) { struct sk_buff *skb = NULL; struct nlmsghdr *nlh; u32 msg_seq; const char *err_str = ""; int ret = -EINVAL; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_MAPINFO_NUM, &nlh, nl_client); if (!skb) { err_str = "Unable to create a nlmsg"; goto mapinfo_num_error; } nlh->nlmsg_seq = iwpm_get_nlmsg_seq(); msg_seq = 0; err_str = "Unable to put attribute of mapinfo number nlmsg"; ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_MAPINFO_SEQ); if (ret) goto mapinfo_num_error; ret = ibnl_put_attr(skb, nlh, sizeof(u32), &mapping_num, IWPM_NLA_MAPINFO_SEND_NUM); if (ret) goto mapinfo_num_error; nlmsg_end(skb, nlh); ret = rdma_nl_unicast(&init_net, skb, iwpm_pid); if (ret) { skb = NULL; err_str = "Unable to send a nlmsg"; goto mapinfo_num_error; } pr_debug("%s: Sent mapping number = %u\n", __func__, mapping_num); return 0; mapinfo_num_error: pr_info("%s: %s\n", __func__, err_str); dev_kfree_skb(skb); return ret; } static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid) { struct nlmsghdr *nlh = NULL; int ret = 0; if (!skb) return ret; if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client, RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) { pr_warn("%s Unable to put NLMSG_DONE\n", __func__); dev_kfree_skb(skb); return -ENOMEM; } nlh->nlmsg_type = NLMSG_DONE; ret = rdma_nl_unicast(&init_net, skb, iwpm_pid); if (ret) pr_warn("%s Unable to send a nlmsg\n", __func__); return ret; } int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid) { struct iwpm_mapping_info *map_info; struct sk_buff *skb = NULL; struct nlmsghdr *nlh; int skb_num = 0, mapping_num = 0; int i = 0, nlmsg_bytes = 0; unsigned long flags; const char *err_str = ""; int ret; skb = dev_alloc_skb(NLMSG_GOODSIZE); if (!skb) { ret = -ENOMEM; err_str = "Unable to allocate skb"; goto send_mapping_info_exit; } skb_num++; spin_lock_irqsave(&iwpm_mapinfo_lock, flags); ret = -EINVAL; for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) { hlist_for_each_entry(map_info, &iwpm_hash_bucket[i], hlist_node) { if (map_info->nl_client != nl_client) continue; nlh = NULL; if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client, RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) { ret = -ENOMEM; err_str = "Unable to put the nlmsg header"; goto send_mapping_info_unlock; } err_str = "Unable to put attribute of the nlmsg"; ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage), &map_info->local_sockaddr, IWPM_NLA_MAPINFO_LOCAL_ADDR); if (ret) goto send_mapping_info_unlock; ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage), &map_info->mapped_sockaddr, IWPM_NLA_MAPINFO_MAPPED_ADDR); if (ret) goto send_mapping_info_unlock; if (iwpm_ulib_version > IWPM_UABI_VERSION_MIN) { ret = ibnl_put_attr(skb, nlh, sizeof(u32), &map_info->map_flags, IWPM_NLA_MAPINFO_FLAGS); if (ret) goto send_mapping_info_unlock; } nlmsg_end(skb, nlh); iwpm_print_sockaddr(&map_info->local_sockaddr, "send_mapping_info: Local sockaddr:"); iwpm_print_sockaddr(&map_info->mapped_sockaddr, "send_mapping_info: Mapped local sockaddr:"); mapping_num++; nlmsg_bytes += nlh->nlmsg_len; /* check if all mappings can fit in one skb */ if (NLMSG_GOODSIZE - nlmsg_bytes < nlh->nlmsg_len * 2) { /* and leave room for NLMSG_DONE */ nlmsg_bytes = 0; skb_num++; spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); /* send the skb */ ret = send_nlmsg_done(skb, nl_client, iwpm_pid); skb = NULL; if (ret) { err_str = "Unable to send map info"; goto send_mapping_info_exit; } if (skb_num == IWPM_MAPINFO_SKB_COUNT) { ret = -ENOMEM; err_str = "Insufficient skbs for map info"; goto send_mapping_info_exit; } skb = dev_alloc_skb(NLMSG_GOODSIZE); if (!skb) { ret = -ENOMEM; err_str = "Unable to allocate skb"; goto send_mapping_info_exit; } spin_lock_irqsave(&iwpm_mapinfo_lock, flags); } } } send_mapping_info_unlock: spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); send_mapping_info_exit: if (ret) { pr_warn("%s: %s (ret = %d)\n", __func__, err_str, ret); dev_kfree_skb(skb); return ret; } send_nlmsg_done(skb, nl_client, iwpm_pid); return send_mapinfo_num(mapping_num, nl_client, iwpm_pid); } int iwpm_mapinfo_available(void) { unsigned long flags; int full_bucket = 0, i = 0; spin_lock_irqsave(&iwpm_mapinfo_lock, flags); if (iwpm_hash_bucket) { for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) { if (!hlist_empty(&iwpm_hash_bucket[i])) { full_bucket = 1; break; } } } spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); return full_bucket; } int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version) { struct sk_buff *skb = NULL; struct nlmsghdr *nlh; const char *err_str; int ret = -EINVAL; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_HELLO, &nlh, nl_client); if (!skb) { err_str = "Unable to create a nlmsg"; goto hello_num_error; } nlh->nlmsg_seq = iwpm_get_nlmsg_seq(); err_str = "Unable to put attribute of abi_version into nlmsg"; ret = ibnl_put_attr(skb, nlh, sizeof(u16), &abi_version, IWPM_NLA_HELLO_ABI_VERSION); if (ret) goto hello_num_error; nlmsg_end(skb, nlh); ret = rdma_nl_unicast(&init_net, skb, iwpm_pid); if (ret) { skb = NULL; err_str = "Unable to send a nlmsg"; goto hello_num_error; } pr_debug("%s: Sent hello abi_version = %u\n", __func__, abi_version); return 0; hello_num_error: pr_info("%s: %s\n", __func__, err_str); dev_kfree_skb(skb); return ret; } |
| 9 1 1 1 1 8 9 10 3 2 10 3 3 3 142 141 142 142 142 142 142 141 141 103 104 102 104 104 104 103 103 104 102 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | // SPDX-License-Identifier: GPL-2.0-only /* * IPv6 library code, needed by static components when full IPv6 support is * not configured or static. These functions are needed by GSO/GRO implementation. */ #include <linux/export.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/addrconf.h> #include <net/secure_seq.h> #include <linux/netfilter.h> static u32 __ipv6_select_ident(struct net *net, const struct in6_addr *dst, const struct in6_addr *src) { return get_random_u32_above(0); } /* This function exists only for tap drivers that must support broken * clients requesting UFO without specifying an IPv6 fragment ID. * * This is similar to ipv6_select_ident() but we use an independent hash * seed to limit information leakage. * * The network header must be set before calling this. */ __be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) { struct in6_addr buf[2]; struct in6_addr *addrs; u32 id; addrs = skb_header_pointer(skb, skb_network_offset(skb) + offsetof(struct ipv6hdr, saddr), sizeof(buf), buf); if (!addrs) return 0; id = __ipv6_select_ident(net, &addrs[1], &addrs[0]); return htonl(id); } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); __be32 ipv6_select_ident(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr) { u32 id; id = __ipv6_select_ident(net, daddr, saddr); return htonl(id); } EXPORT_SYMBOL(ipv6_select_ident); int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { unsigned int offset = sizeof(struct ipv6hdr); unsigned int packet_len = skb_tail_pointer(skb) - skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; while (offset <= packet_len) { struct ipv6_opt_hdr *exthdr; switch (**nexthdr) { case NEXTHDR_HOP: break; case NEXTHDR_ROUTING: found_rhdr = 1; break; case NEXTHDR_DEST: #if IS_ENABLED(CONFIG_IPV6_MIP6) if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) break; #endif if (found_rhdr) return offset; break; default: return offset; } if (offset + sizeof(struct ipv6_opt_hdr) > packet_len) return -EINVAL; exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + offset); offset += ipv6_optlen(exthdr); if (offset > IPV6_MAXPLEN) return -EINVAL; *nexthdr = &exthdr->nexthdr; } return -EINVAL; } EXPORT_SYMBOL(ip6_find_1stfragopt); #if IS_ENABLED(CONFIG_IPV6) int ip6_dst_hoplimit(struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); if (hoplimit == 0) { struct net_device *dev = dst->dev; struct inet6_dev *idev; rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) hoplimit = READ_ONCE(idev->cnf.hop_limit); else hoplimit = READ_ONCE(dev_net(dev)->ipv6.devconf_all->hop_limit); rcu_read_unlock(); } return hoplimit; } EXPORT_SYMBOL(ip6_dst_hoplimit); #endif int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { int len; len = skb->len - sizeof(struct ipv6hdr); if (len > IPV6_MAXPLEN) len = 0; ipv6_hdr(skb)->payload_len = htons(len); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing */ skb = l3mdev_ip6_out(sk, skb); if (unlikely(!skb)) return 0; skb->protocol = htons(ETH_P_IPV6); return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); } EXPORT_SYMBOL_GPL(__ip6_local_out); int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { int err; err = __ip6_local_out(net, sk, skb); if (likely(err == 1)) err = dst_output(net, sk, skb); return err; } EXPORT_SYMBOL_GPL(ip6_local_out); |
| 25 25 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * AppArmor security module * * This file contains AppArmor policy definitions. * * Copyright (C) 1998-2008 Novell/SUSE * Copyright 2009-2017 Canonical Ltd. */ #ifndef __AA_NAMESPACE_H #define __AA_NAMESPACE_H #include <linux/kref.h> #include "apparmor.h" #include "apparmorfs.h" #include "label.h" #include "policy.h" /* struct aa_ns_acct - accounting of profiles in namespace * @max_size: maximum space allowed for all profiles in namespace * @max_count: maximum number of profiles that can be in this namespace * @size: current size of profiles * @count: current count of profiles (includes null profiles) */ struct aa_ns_acct { int max_size; int max_count; int size; int count; }; /* struct aa_ns - namespace for a set of profiles * @base: common policy * @parent: parent of namespace * @lock: lock for modifying the object * @acct: accounting for the namespace * @unconfined: special unconfined profile for the namespace * @sub_ns: list of namespaces under the current namespace. * @uniq_null: uniq value used for null learning profiles * @uniq_id: a unique id count for the profiles in the namespace * @level: level of ns within the tree hierarchy * @dents: dentries for the namespaces file entries in apparmorfs * * An aa_ns defines the set profiles that are searched to determine which * profile to attach to a task. Profiles can not be shared between aa_ns * and profile names within a namespace are guaranteed to be unique. When * profiles in separate namespaces have the same name they are NOT considered * to be equivalent. * * Namespaces are hierarchical and only namespaces and profiles below the * current namespace are visible. * * Namespace names must be unique and can not contain the characters :/\0 */ struct aa_ns { struct aa_policy base; struct aa_ns *parent; struct mutex lock; struct aa_ns_acct acct; struct aa_profile *unconfined; struct list_head sub_ns; atomic_t uniq_null; long uniq_id; int level; long revision; wait_queue_head_t wait; struct aa_labelset labels; struct list_head rawdata_list; struct dentry *dents[AAFS_NS_SIZEOF]; }; extern struct aa_label *kernel_t; extern struct aa_ns *root_ns; extern const char *aa_hidden_ns_name; #define ns_unconfined(NS) (&(NS)->unconfined->label) bool aa_ns_visible(struct aa_ns *curr, struct aa_ns *view, bool subns); const char *aa_ns_name(struct aa_ns *parent, struct aa_ns *child, bool subns); void aa_free_ns(struct aa_ns *ns); int aa_alloc_root_ns(void); void aa_free_root_ns(void); struct aa_ns *__aa_lookupn_ns(struct aa_ns *view, const char *hname, size_t n); struct aa_ns *aa_lookupn_ns(struct aa_ns *view, const char *name, size_t n); struct aa_ns *__aa_find_or_create_ns(struct aa_ns *parent, const char *name, struct dentry *dir); struct aa_ns *aa_prepare_ns(struct aa_ns *root, const char *name); void __aa_remove_ns(struct aa_ns *ns); static inline struct aa_profile *aa_deref_parent(struct aa_profile *p) { return rcu_dereference_protected(p->parent, mutex_is_locked(&p->ns->lock)); } /** * aa_get_ns - increment references count on @ns * @ns: namespace to increment reference count of (MAYBE NULL) * * Returns: pointer to @ns, if @ns is NULL returns NULL * Requires: @ns must be held with valid refcount when called */ static inline struct aa_ns *aa_get_ns(struct aa_ns *ns) { if (ns) aa_get_profile(ns->unconfined); return ns; } /** * aa_put_ns - decrement refcount on @ns * @ns: namespace to put reference of * * Decrement reference count of @ns and if no longer in use free it */ static inline void aa_put_ns(struct aa_ns *ns) { if (ns) aa_put_profile(ns->unconfined); } /** * __aa_findn_ns - find a namespace on a list by @name * @head: list to search for namespace on (NOT NULL) * @name: name of namespace to look for (NOT NULL) * @n: length of @name * Returns: unrefcounted namespace * * Requires: rcu_read_lock be held */ static inline struct aa_ns *__aa_findn_ns(struct list_head *head, const char *name, size_t n) { return (struct aa_ns *)__policy_strn_find(head, name, n); } static inline struct aa_ns *__aa_find_ns(struct list_head *head, const char *name) { return __aa_findn_ns(head, name, strlen(name)); } #endif /* AA_NAMESPACE_H */ |
| 10 10 10 10 10 10 10 10 10 10 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 | // SPDX-License-Identifier: GPL-2.0-or-later /* X.509 certificate parser * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) "X.509: "fmt #include <linux/kernel.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/oid_registry.h> #include <crypto/public_key.h> #include "x509_parser.h" #include "x509.asn1.h" #include "x509_akid.asn1.h" struct x509_parse_context { struct x509_certificate *cert; /* Certificate being constructed */ unsigned long data; /* Start of data */ const void *key; /* Key data */ size_t key_size; /* Size of key data */ const void *params; /* Key parameters */ size_t params_size; /* Size of key parameters */ enum OID key_algo; /* Algorithm used by the cert's key */ enum OID last_oid; /* Last OID encountered */ enum OID sig_algo; /* Algorithm used to sign the cert */ u8 o_size; /* Size of organizationName (O) */ u8 cn_size; /* Size of commonName (CN) */ u8 email_size; /* Size of emailAddress */ u16 o_offset; /* Offset of organizationName (O) */ u16 cn_offset; /* Offset of commonName (CN) */ u16 email_offset; /* Offset of emailAddress */ unsigned raw_akid_size; const void *raw_akid; /* Raw authorityKeyId in ASN.1 */ const void *akid_raw_issuer; /* Raw directoryName in authorityKeyId */ unsigned akid_raw_issuer_size; }; /* * Free an X.509 certificate */ void x509_free_certificate(struct x509_certificate *cert) { if (cert) { public_key_free(cert->pub); public_key_signature_free(cert->sig); kfree(cert->issuer); kfree(cert->subject); kfree(cert->id); kfree(cert->skid); kfree(cert); } } EXPORT_SYMBOL_GPL(x509_free_certificate); /* * Parse an X.509 certificate */ struct x509_certificate *x509_cert_parse(const void *data, size_t datalen) { struct x509_certificate *cert __free(x509_free_certificate); struct x509_parse_context *ctx __free(kfree) = NULL; struct asymmetric_key_id *kid; long ret; cert = kzalloc(sizeof(struct x509_certificate), GFP_KERNEL); if (!cert) return ERR_PTR(-ENOMEM); cert->pub = kzalloc(sizeof(struct public_key), GFP_KERNEL); if (!cert->pub) return ERR_PTR(-ENOMEM); cert->sig = kzalloc(sizeof(struct public_key_signature), GFP_KERNEL); if (!cert->sig) return ERR_PTR(-ENOMEM); ctx = kzalloc(sizeof(struct x509_parse_context), GFP_KERNEL); if (!ctx) return ERR_PTR(-ENOMEM); ctx->cert = cert; ctx->data = (unsigned long)data; /* Attempt to decode the certificate */ ret = asn1_ber_decoder(&x509_decoder, ctx, data, datalen); if (ret < 0) return ERR_PTR(ret); /* Decode the AuthorityKeyIdentifier */ if (ctx->raw_akid) { pr_devel("AKID: %u %*phN\n", ctx->raw_akid_size, ctx->raw_akid_size, ctx->raw_akid); ret = asn1_ber_decoder(&x509_akid_decoder, ctx, ctx->raw_akid, ctx->raw_akid_size); if (ret < 0) { pr_warn("Couldn't decode AuthKeyIdentifier\n"); return ERR_PTR(ret); } } cert->pub->key = kmemdup(ctx->key, ctx->key_size, GFP_KERNEL); if (!cert->pub->key) return ERR_PTR(-ENOMEM); cert->pub->keylen = ctx->key_size; cert->pub->params = kmemdup(ctx->params, ctx->params_size, GFP_KERNEL); if (!cert->pub->params) return ERR_PTR(-ENOMEM); cert->pub->paramlen = ctx->params_size; cert->pub->algo = ctx->key_algo; /* Grab the signature bits */ ret = x509_get_sig_params(cert); if (ret < 0) return ERR_PTR(ret); /* Generate cert issuer + serial number key ID */ kid = asymmetric_key_generate_id(cert->raw_serial, cert->raw_serial_size, cert->raw_issuer, cert->raw_issuer_size); if (IS_ERR(kid)) return ERR_CAST(kid); cert->id = kid; /* Detect self-signed certificates */ ret = x509_check_for_self_signed(cert); if (ret < 0) return ERR_PTR(ret); return_ptr(cert); } EXPORT_SYMBOL_GPL(x509_cert_parse); /* * Note an OID when we find one for later processing when we know how * to interpret it. */ int x509_note_OID(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct x509_parse_context *ctx = context; ctx->last_oid = look_up_OID(value, vlen); if (ctx->last_oid == OID__NR) { char buffer[50]; sprint_oid(value, vlen, buffer, sizeof(buffer)); pr_debug("Unknown OID: [%lu] %s\n", (unsigned long)value - ctx->data, buffer); } return 0; } /* * Save the position of the TBS data so that we can check the signature over it * later. */ int x509_note_tbs_certificate(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct x509_parse_context *ctx = context; pr_debug("x509_note_tbs_certificate(,%zu,%02x,%ld,%zu)!\n", hdrlen, tag, (unsigned long)value - ctx->data, vlen); ctx->cert->tbs = value - hdrlen; ctx->cert->tbs_size = vlen + hdrlen; return 0; } /* * Record the algorithm that was used to sign this certificate. */ int x509_note_sig_algo(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct x509_parse_context *ctx = context; pr_debug("PubKey Algo: %u\n", ctx->last_oid); switch (ctx->last_oid) { default: return -ENOPKG; /* Unsupported combination */ case OID_sha1WithRSAEncryption: ctx->cert->sig->hash_algo = "sha1"; goto rsa_pkcs1; case OID_sha256WithRSAEncryption: ctx->cert->sig->hash_algo = "sha256"; goto rsa_pkcs1; case OID_sha384WithRSAEncryption: ctx->cert->sig->hash_algo = "sha384"; goto rsa_pkcs1; case OID_sha512WithRSAEncryption: ctx->cert->sig->hash_algo = "sha512"; goto rsa_pkcs1; case OID_sha224WithRSAEncryption: ctx->cert->sig->hash_algo = "sha224"; goto rsa_pkcs1; case OID_id_ecdsa_with_sha1: ctx->cert->sig->hash_algo = "sha1"; goto ecdsa; case OID_id_rsassa_pkcs1_v1_5_with_sha3_256: ctx->cert->sig->hash_algo = "sha3-256"; goto rsa_pkcs1; case OID_id_rsassa_pkcs1_v1_5_with_sha3_384: ctx->cert->sig->hash_algo = "sha3-384"; goto rsa_pkcs1; case OID_id_rsassa_pkcs1_v1_5_with_sha3_512: ctx->cert->sig->hash_algo = "sha3-512"; goto rsa_pkcs1; case OID_id_ecdsa_with_sha224: ctx->cert->sig->hash_algo = "sha224"; goto ecdsa; case OID_id_ecdsa_with_sha256: ctx->cert->sig->hash_algo = "sha256"; goto ecdsa; case OID_id_ecdsa_with_sha384: ctx->cert->sig->hash_algo = "sha384"; goto ecdsa; case OID_id_ecdsa_with_sha512: ctx->cert->sig->hash_algo = "sha512"; goto ecdsa; case OID_id_ecdsa_with_sha3_256: ctx->cert->sig->hash_algo = "sha3-256"; goto ecdsa; case OID_id_ecdsa_with_sha3_384: ctx->cert->sig->hash_algo = "sha3-384"; goto ecdsa; case OID_id_ecdsa_with_sha3_512: ctx->cert->sig->hash_algo = "sha3-512"; goto ecdsa; case OID_gost2012Signature256: ctx->cert->sig->hash_algo = "streebog256"; goto ecrdsa; case OID_gost2012Signature512: ctx->cert->sig->hash_algo = "streebog512"; goto ecrdsa; } rsa_pkcs1: ctx->cert->sig->pkey_algo = "rsa"; ctx->cert->sig->encoding = "pkcs1"; ctx->sig_algo = ctx->last_oid; return 0; ecrdsa: ctx->cert->sig->pkey_algo = "ecrdsa"; ctx->cert->sig->encoding = "raw"; ctx->sig_algo = ctx->last_oid; return 0; ecdsa: ctx->cert->sig->pkey_algo = "ecdsa"; ctx->cert->sig->encoding = "x962"; ctx->sig_algo = ctx->last_oid; return 0; } /* * Note the whereabouts and type of the signature. */ int x509_note_signature(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct x509_parse_context *ctx = context; pr_debug("Signature: alg=%u, size=%zu\n", ctx->last_oid, vlen); /* * In X.509 certificates, the signature's algorithm is stored in two * places: inside the TBSCertificate (the data that is signed), and * alongside the signature. These *must* match. */ if (ctx->last_oid != ctx->sig_algo) { pr_warn("signatureAlgorithm (%u) differs from tbsCertificate.signature (%u)\n", ctx->last_oid, ctx->sig_algo); return -EINVAL; } if (strcmp(ctx->cert->sig->pkey_algo, "rsa") == 0 || strcmp(ctx->cert->sig->pkey_algo, "ecrdsa") == 0 || strcmp(ctx->cert->sig->pkey_algo, "ecdsa") == 0) { /* Discard the BIT STRING metadata */ if (vlen < 1 || *(const u8 *)value != 0) return -EBADMSG; value++; vlen--; } ctx->cert->raw_sig = value; ctx->cert->raw_sig_size = vlen; return 0; } /* * Note the certificate serial number */ int x509_note_serial(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct x509_parse_context *ctx = context; ctx->cert->raw_serial = value; ctx->cert->raw_serial_size = vlen; return 0; } /* * Note some of the name segments from which we'll fabricate a name. */ int x509_extract_name_segment(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct x509_parse_context *ctx = context; switch (ctx->last_oid) { case OID_commonName: ctx->cn_size = vlen; ctx->cn_offset = (unsigned long)value - ctx->data; break; case OID_organizationName: ctx->o_size = vlen; ctx->o_offset = (unsigned long)value - ctx->data; break; case OID_email_address: ctx->email_size = vlen; ctx->email_offset = (unsigned long)value - ctx->data; break; default: break; } return 0; } /* * Fabricate and save the issuer and subject names */ static int x509_fabricate_name(struct x509_parse_context *ctx, size_t hdrlen, unsigned char tag, char **_name, size_t vlen) { const void *name, *data = (const void *)ctx->data; size_t namesize; char *buffer; if (*_name) return -EINVAL; /* Empty name string if no material */ if (!ctx->cn_size && !ctx->o_size && !ctx->email_size) { buffer = kzalloc(1, GFP_KERNEL); if (!buffer) return -ENOMEM; goto done; } if (ctx->cn_size && ctx->o_size) { /* Consider combining O and CN, but use only the CN if it is * prefixed by the O, or a significant portion thereof. */ namesize = ctx->cn_size; name = data + ctx->cn_offset; if (ctx->cn_size >= ctx->o_size && memcmp(data + ctx->cn_offset, data + ctx->o_offset, ctx->o_size) == 0) goto single_component; if (ctx->cn_size >= 7 && ctx->o_size >= 7 && memcmp(data + ctx->cn_offset, data + ctx->o_offset, 7) == 0) goto single_component; buffer = kmalloc(ctx->o_size + 2 + ctx->cn_size + 1, GFP_KERNEL); if (!buffer) return -ENOMEM; memcpy(buffer, data + ctx->o_offset, ctx->o_size); buffer[ctx->o_size + 0] = ':'; buffer[ctx->o_size + 1] = ' '; memcpy(buffer + ctx->o_size + 2, data + ctx->cn_offset, ctx->cn_size); buffer[ctx->o_size + 2 + ctx->cn_size] = 0; goto done; } else if (ctx->cn_size) { namesize = ctx->cn_size; name = data + ctx->cn_offset; } else if (ctx->o_size) { namesize = ctx->o_size; name = data + ctx->o_offset; } else { namesize = ctx->email_size; name = data + ctx->email_offset; } single_component: buffer = kmalloc(namesize + 1, GFP_KERNEL); if (!buffer) return -ENOMEM; memcpy(buffer, name, namesize); buffer[namesize] = 0; done: *_name = buffer; ctx->cn_size = 0; ctx->o_size = 0; ctx->email_size = 0; return 0; } int x509_note_issuer(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct x509_parse_context *ctx = context; struct asymmetric_key_id *kid; ctx->cert->raw_issuer = value; ctx->cert->raw_issuer_size = vlen; if (!ctx->cert->sig->auth_ids[2]) { kid = asymmetric_key_generate_id(value, vlen, "", 0); if (IS_ERR(kid)) return PTR_ERR(kid); ctx->cert->sig->auth_ids[2] = kid; } return x509_fabricate_name(ctx, hdrlen, tag, &ctx->cert->issuer, vlen); } int x509_note_subject(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct x509_parse_context *ctx = context; ctx->cert->raw_subject = value; ctx->cert->raw_subject_size = vlen; return x509_fabricate_name(ctx, hdrlen, tag, &ctx->cert->subject, vlen); } /* * Extract the parameters for the public key */ int x509_note_params(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct x509_parse_context *ctx = context; /* * AlgorithmIdentifier is used three times in the x509, we should skip * first and ignore third, using second one which is after subject and * before subjectPublicKey. */ if (!ctx->cert->raw_subject || ctx->key) return 0; ctx->params = value - hdrlen; ctx->params_size = vlen + hdrlen; return 0; } /* * Extract the data for the public key algorithm */ int x509_extract_key_data(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct x509_parse_context *ctx = context; enum OID oid; ctx->key_algo = ctx->last_oid; switch (ctx->last_oid) { case OID_rsaEncryption: ctx->cert->pub->pkey_algo = "rsa"; break; case OID_gost2012PKey256: case OID_gost2012PKey512: ctx->cert->pub->pkey_algo = "ecrdsa"; break; case OID_id_ecPublicKey: if (parse_OID(ctx->params, ctx->params_size, &oid) != 0) return -EBADMSG; switch (oid) { case OID_id_prime192v1: ctx->cert->pub->pkey_algo = "ecdsa-nist-p192"; break; case OID_id_prime256v1: ctx->cert->pub->pkey_algo = "ecdsa-nist-p256"; break; case OID_id_ansip384r1: ctx->cert->pub->pkey_algo = "ecdsa-nist-p384"; break; case OID_id_ansip521r1: ctx->cert->pub->pkey_algo = "ecdsa-nist-p521"; break; default: return -ENOPKG; } break; default: return -ENOPKG; } /* Discard the BIT STRING metadata */ if (vlen < 1 || *(const u8 *)value != 0) return -EBADMSG; ctx->key = value + 1; ctx->key_size = vlen - 1; return 0; } /* The keyIdentifier in AuthorityKeyIdentifier SEQUENCE is tag(CONT,PRIM,0) */ #define SEQ_TAG_KEYID (ASN1_CONT << 6) /* * Process certificate extensions that are used to qualify the certificate. */ int x509_process_extension(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct x509_parse_context *ctx = context; struct asymmetric_key_id *kid; const unsigned char *v = value; pr_debug("Extension: %u\n", ctx->last_oid); if (ctx->last_oid == OID_subjectKeyIdentifier) { /* Get hold of the key fingerprint */ if (ctx->cert->skid || vlen < 3) return -EBADMSG; if (v[0] != ASN1_OTS || v[1] != vlen - 2) return -EBADMSG; v += 2; vlen -= 2; ctx->cert->raw_skid_size = vlen; ctx->cert->raw_skid = v; kid = asymmetric_key_generate_id(v, vlen, "", 0); if (IS_ERR(kid)) return PTR_ERR(kid); ctx->cert->skid = kid; pr_debug("subjkeyid %*phN\n", kid->len, kid->data); return 0; } if (ctx->last_oid == OID_keyUsage) { /* * Get hold of the keyUsage bit string * v[1] is the encoding size * (Expect either 0x02 or 0x03, making it 1 or 2 bytes) * v[2] is the number of unused bits in the bit string * (If >= 3 keyCertSign is missing when v[1] = 0x02) * v[3] and possibly v[4] contain the bit string * * From RFC 5280 4.2.1.3: * 0x04 is where keyCertSign lands in this bit string * 0x80 is where digitalSignature lands in this bit string */ if (v[0] != ASN1_BTS) return -EBADMSG; if (vlen < 4) return -EBADMSG; if (v[2] >= 8) return -EBADMSG; if (v[3] & 0x80) ctx->cert->pub->key_eflags |= 1 << KEY_EFLAG_DIGITALSIG; if (v[1] == 0x02 && v[2] <= 2 && (v[3] & 0x04)) ctx->cert->pub->key_eflags |= 1 << KEY_EFLAG_KEYCERTSIGN; else if (vlen > 4 && v[1] == 0x03 && (v[3] & 0x04)) ctx->cert->pub->key_eflags |= 1 << KEY_EFLAG_KEYCERTSIGN; return 0; } if (ctx->last_oid == OID_authorityKeyIdentifier) { /* Get hold of the CA key fingerprint */ ctx->raw_akid = v; ctx->raw_akid_size = vlen; return 0; } if (ctx->last_oid == OID_basicConstraints) { /* * Get hold of the basicConstraints * v[1] is the encoding size * (Expect 0x2 or greater, making it 1 or more bytes) * v[2] is the encoding type * (Expect an ASN1_BOOL for the CA) * v[3] is the contents of the ASN1_BOOL * (Expect 1 if the CA is TRUE) * vlen should match the entire extension size */ if (v[0] != (ASN1_CONS_BIT | ASN1_SEQ)) return -EBADMSG; if (vlen < 2) return -EBADMSG; if (v[1] != vlen - 2) return -EBADMSG; if (vlen >= 4 && v[1] != 0 && v[2] == ASN1_BOOL && v[3] == 1) ctx->cert->pub->key_eflags |= 1 << KEY_EFLAG_CA; return 0; } return 0; } /** * x509_decode_time - Decode an X.509 time ASN.1 object * @_t: The time to fill in * @hdrlen: The length of the object header * @tag: The object tag * @value: The object value * @vlen: The size of the object value * * Decode an ASN.1 universal time or generalised time field into a struct the * kernel can handle and check it for validity. The time is decoded thus: * * [RFC5280 §4.1.2.5] * CAs conforming to this profile MUST always encode certificate validity * dates through the year 2049 as UTCTime; certificate validity dates in * 2050 or later MUST be encoded as GeneralizedTime. Conforming * applications MUST be able to process validity dates that are encoded in * either UTCTime or GeneralizedTime. */ int x509_decode_time(time64_t *_t, size_t hdrlen, unsigned char tag, const unsigned char *value, size_t vlen) { static const unsigned char month_lengths[] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; const unsigned char *p = value; unsigned year, mon, day, hour, min, sec, mon_len; #define dec2bin(X) ({ unsigned char x = (X) - '0'; if (x > 9) goto invalid_time; x; }) #define DD2bin(P) ({ unsigned x = dec2bin(P[0]) * 10 + dec2bin(P[1]); P += 2; x; }) if (tag == ASN1_UNITIM) { /* UTCTime: YYMMDDHHMMSSZ */ if (vlen != 13) goto unsupported_time; year = DD2bin(p); if (year >= 50) year += 1900; else year += 2000; } else if (tag == ASN1_GENTIM) { /* GenTime: YYYYMMDDHHMMSSZ */ if (vlen != 15) goto unsupported_time; year = DD2bin(p) * 100 + DD2bin(p); if (year >= 1950 && year <= 2049) goto invalid_time; } else { goto unsupported_time; } mon = DD2bin(p); day = DD2bin(p); hour = DD2bin(p); min = DD2bin(p); sec = DD2bin(p); if (*p != 'Z') goto unsupported_time; if (year < 1970 || mon < 1 || mon > 12) goto invalid_time; mon_len = month_lengths[mon - 1]; if (mon == 2) { if (year % 4 == 0) { mon_len = 29; if (year % 100 == 0) { mon_len = 28; if (year % 400 == 0) mon_len = 29; } } } if (day < 1 || day > mon_len || hour > 24 || /* ISO 8601 permits 24:00:00 as midnight tomorrow */ min > 59 || sec > 60) /* ISO 8601 permits leap seconds [X.680 46.3] */ goto invalid_time; *_t = mktime64(year, mon, day, hour, min, sec); return 0; unsupported_time: pr_debug("Got unsupported time [tag %02x]: '%*phN'\n", tag, (int)vlen, value); return -EBADMSG; invalid_time: pr_debug("Got invalid time [tag %02x]: '%*phN'\n", tag, (int)vlen, value); return -EBADMSG; } EXPORT_SYMBOL_GPL(x509_decode_time); int x509_note_not_before(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct x509_parse_context *ctx = context; return x509_decode_time(&ctx->cert->valid_from, hdrlen, tag, value, vlen); } int x509_note_not_after(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct x509_parse_context *ctx = context; return x509_decode_time(&ctx->cert->valid_to, hdrlen, tag, value, vlen); } /* * Note a key identifier-based AuthorityKeyIdentifier */ int x509_akid_note_kid(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct x509_parse_context *ctx = context; struct asymmetric_key_id *kid; pr_debug("AKID: keyid: %*phN\n", (int)vlen, value); if (ctx->cert->sig->auth_ids[1]) return 0; kid = asymmetric_key_generate_id(value, vlen, "", 0); if (IS_ERR(kid)) return PTR_ERR(kid); pr_debug("authkeyid %*phN\n", kid->len, kid->data); ctx->cert->sig->auth_ids[1] = kid; return 0; } /* * Note a directoryName in an AuthorityKeyIdentifier */ int x509_akid_note_name(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct x509_parse_context *ctx = context; pr_debug("AKID: name: %*phN\n", (int)vlen, value); ctx->akid_raw_issuer = value; ctx->akid_raw_issuer_size = vlen; return 0; } /* * Note a serial number in an AuthorityKeyIdentifier */ int x509_akid_note_serial(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct x509_parse_context *ctx = context; struct asymmetric_key_id *kid; pr_debug("AKID: serial: %*phN\n", (int)vlen, value); if (!ctx->akid_raw_issuer || ctx->cert->sig->auth_ids[0]) return 0; kid = asymmetric_key_generate_id(value, vlen, ctx->akid_raw_issuer, ctx->akid_raw_issuer_size); if (IS_ERR(kid)) return PTR_ERR(kid); pr_debug("authkeyid %*phN\n", kid->len, kid->data); ctx->cert->sig->auth_ids[0] = kid; return 0; } |
| 795 36 2 2 1 1 1 2592 47 47 762 95 96 1866 922 1121 2592 2589 2606 119 2499 2091 2505 2502 1292 1412 2499 449 407 1 1 406 403 406 2 2 2107 710 712 1477 95 96 1398 5 1399 5 1406 1796 38 2592 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2009 Sunplus Core Technology Co., Ltd. * Lennox Wu <lennox.wu@sunplusct.com> * Chen Liqin <liqin.chen@sunplusct.com> * Copyright (C) 2012 Regents of the University of California */ #include <linux/mm.h> #include <linux/kernel.h> #include <linux/interrupt.h> #include <linux/perf_event.h> #include <linux/signal.h> #include <linux/uaccess.h> #include <linux/kprobes.h> #include <linux/kfence.h> #include <linux/entry-common.h> #include <asm/ptrace.h> #include <asm/tlbflush.h> #include "../kernel/head.h" static void show_pte(unsigned long addr) { pgd_t *pgdp, pgd; p4d_t *p4dp, p4d; pud_t *pudp, pud; pmd_t *pmdp, pmd; pte_t *ptep, pte; struct mm_struct *mm = current->mm; if (!mm) mm = &init_mm; pr_alert("Current %s pgtable: %luK pagesize, %d-bit VAs, pgdp=0x%016llx\n", current->comm, PAGE_SIZE / SZ_1K, VA_BITS, mm == &init_mm ? (u64)__pa_symbol(mm->pgd) : virt_to_phys(mm->pgd)); pgdp = pgd_offset(mm, addr); pgd = pgdp_get(pgdp); pr_alert("[%016lx] pgd=%016lx", addr, pgd_val(pgd)); if (pgd_none(pgd) || pgd_bad(pgd) || pgd_leaf(pgd)) goto out; p4dp = p4d_offset(pgdp, addr); p4d = p4dp_get(p4dp); pr_cont(", p4d=%016lx", p4d_val(p4d)); if (p4d_none(p4d) || p4d_bad(p4d) || p4d_leaf(p4d)) goto out; pudp = pud_offset(p4dp, addr); pud = pudp_get(pudp); pr_cont(", pud=%016lx", pud_val(pud)); if (pud_none(pud) || pud_bad(pud) || pud_leaf(pud)) goto out; pmdp = pmd_offset(pudp, addr); pmd = pmdp_get(pmdp); pr_cont(", pmd=%016lx", pmd_val(pmd)); if (pmd_none(pmd) || pmd_bad(pmd) || pmd_leaf(pmd)) goto out; ptep = pte_offset_map(pmdp, addr); if (!ptep) goto out; pte = ptep_get(ptep); pr_cont(", pte=%016lx", pte_val(pte)); pte_unmap(ptep); out: pr_cont("\n"); } static void die_kernel_fault(const char *msg, unsigned long addr, struct pt_regs *regs) { bust_spinlocks(1); pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", msg, addr); bust_spinlocks(0); show_pte(addr); die(regs, "Oops"); make_task_dead(SIGKILL); } static inline void no_context(struct pt_regs *regs, unsigned long addr) { const char *msg; /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) return; /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ if (addr < PAGE_SIZE) msg = "NULL pointer dereference"; else { if (kfence_handle_page_fault(addr, regs->cause == EXC_STORE_PAGE_FAULT, regs)) return; msg = "paging request"; } die_kernel_fault(msg, addr, regs); } static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) { if (!user_mode(regs)) { no_context(regs, addr); return; } if (fault & VM_FAULT_OOM) { /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). */ pagefault_out_of_memory(); return; } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) { /* Kernel mode? Handle exceptions or die */ do_trap(regs, SIGBUS, BUS_ADRERR, addr); return; } else if (fault & VM_FAULT_SIGSEGV) { do_trap(regs, SIGSEGV, SEGV_MAPERR, addr); return; } BUG(); } static inline void bad_area_nosemaphore(struct pt_regs *regs, int code, unsigned long addr) { /* * Something tried to access memory that isn't in our memory map. * Fix it, but check if it's kernel or user first. */ /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { do_trap(regs, SIGSEGV, code, addr); return; } no_context(regs, addr); } static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr) { mmap_read_unlock(mm); bad_area_nosemaphore(regs, code, addr); } static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr) { pgd_t *pgd, *pgd_k; pud_t *pud_k; p4d_t *p4d_k; pmd_t *pmd_k; pte_t *pte_k; int index; unsigned long pfn; /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) return do_trap(regs, SIGSEGV, code, addr); /* * Synchronize this task's top level page-table * with the 'reference' page table. * * Do _not_ use "tsk->active_mm->pgd" here. * We might be inside an interrupt in the middle * of a task switch. */ index = pgd_index(addr); pfn = csr_read(CSR_SATP) & SATP_PPN; pgd = (pgd_t *)pfn_to_virt(pfn) + index; pgd_k = init_mm.pgd + index; if (!pgd_present(pgdp_get(pgd_k))) { no_context(regs, addr); return; } set_pgd(pgd, pgdp_get(pgd_k)); p4d_k = p4d_offset(pgd_k, addr); if (!p4d_present(p4dp_get(p4d_k))) { no_context(regs, addr); return; } pud_k = pud_offset(p4d_k, addr); if (!pud_present(pudp_get(pud_k))) { no_context(regs, addr); return; } if (pud_leaf(pudp_get(pud_k))) goto flush_tlb; /* * Since the vmalloc area is global, it is unnecessary * to copy individual PTEs */ pmd_k = pmd_offset(pud_k, addr); if (!pmd_present(pmdp_get(pmd_k))) { no_context(regs, addr); return; } if (pmd_leaf(pmdp_get(pmd_k))) goto flush_tlb; /* * Make sure the actual PTE exists as well to * catch kernel vmalloc-area accesses to non-mapped * addresses. If we don't do this, this will just * silently loop forever. */ pte_k = pte_offset_kernel(pmd_k, addr); if (!pte_present(ptep_get(pte_k))) { no_context(regs, addr); return; } /* * The kernel assumes that TLBs don't cache invalid * entries, but in RISC-V, SFENCE.VMA specifies an * ordering constraint, not a cache flush; it is * necessary even after writing invalid entries. */ flush_tlb: local_flush_tlb_page(addr); } static inline bool access_error(unsigned long cause, struct vm_area_struct *vma) { switch (cause) { case EXC_INST_PAGE_FAULT: if (!(vma->vm_flags & VM_EXEC)) { return true; } break; case EXC_LOAD_PAGE_FAULT: /* Write implies read */ if (!(vma->vm_flags & (VM_READ | VM_WRITE))) { return true; } break; case EXC_STORE_PAGE_FAULT: if (!(vma->vm_flags & VM_WRITE)) { return true; } break; default: panic("%s: unhandled cause %lu", __func__, cause); } return false; } /* * This routine handles page faults. It determines the address and the * problem, and then passes it off to one of the appropriate routines. */ void handle_page_fault(struct pt_regs *regs) { struct task_struct *tsk; struct vm_area_struct *vma; struct mm_struct *mm; unsigned long addr, cause; unsigned int flags = FAULT_FLAG_DEFAULT; int code = SEGV_MAPERR; vm_fault_t fault; cause = regs->cause; addr = regs->badaddr; tsk = current; mm = tsk->mm; if (kprobe_page_fault(regs, cause)) return; /* * Fault-in kernel-space virtual memory on-demand. * The 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. */ if ((!IS_ENABLED(CONFIG_MMU) || !IS_ENABLED(CONFIG_64BIT)) && unlikely(addr >= VMALLOC_START && addr < VMALLOC_END)) { vmalloc_fault(regs, code, addr); return; } /* Enable interrupts if they were enabled in the parent context. */ if (!regs_irqs_disabled(regs)) local_irq_enable(); /* * If we're in an interrupt, have no user context, or are running * in an atomic region, then we must not take the fault. */ if (unlikely(faulthandler_disabled() || !mm)) { tsk->thread.bad_cause = cause; no_context(regs, addr); return; } if (user_mode(regs)) flags |= FAULT_FLAG_USER; if (!user_mode(regs) && addr < TASK_SIZE && unlikely(!(regs->status & SR_SUM))) { if (fixup_exception(regs)) return; die_kernel_fault("access to user memory without uaccess routines", addr, regs); } perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); if (cause == EXC_STORE_PAGE_FAULT) flags |= FAULT_FLAG_WRITE; else if (cause == EXC_INST_PAGE_FAULT) flags |= FAULT_FLAG_INSTRUCTION; if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; vma = lock_vma_under_rcu(mm, addr); if (!vma) goto lock_mmap; if (unlikely(access_error(cause, vma))) { vma_end_read(vma); count_vm_vma_lock_event(VMA_LOCK_SUCCESS); tsk->thread.bad_cause = cause; bad_area_nosemaphore(regs, SEGV_ACCERR, addr); return; } fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs); if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) vma_end_read(vma); if (!(fault & VM_FAULT_RETRY)) { count_vm_vma_lock_event(VMA_LOCK_SUCCESS); goto done; } count_vm_vma_lock_event(VMA_LOCK_RETRY); if (fault & VM_FAULT_MAJOR) flags |= FAULT_FLAG_TRIED; if (fault_signal_pending(fault, regs)) { if (!user_mode(regs)) no_context(regs, addr); return; } lock_mmap: retry: vma = lock_mm_and_find_vma(mm, addr, regs); if (unlikely(!vma)) { tsk->thread.bad_cause = cause; bad_area_nosemaphore(regs, code, addr); return; } /* * Ok, we have a good vm_area for this memory access, so * we can handle it. */ code = SEGV_ACCERR; if (unlikely(access_error(cause, vma))) { tsk->thread.bad_cause = cause; bad_area(regs, mm, code, addr); return; } /* * If for any reason at all we could not handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(vma, addr, flags, regs); /* * If we need to retry but a fatal signal is pending, handle the * signal first. We do not need to release the mmap_lock because it * would already be released in __lock_page_or_retry in mm/filemap.c. */ if (fault_signal_pending(fault, regs)) { if (!user_mode(regs)) no_context(regs, addr); return; } /* The fault is fully completed (including releasing mmap lock) */ if (fault & VM_FAULT_COMPLETED) return; if (unlikely(fault & VM_FAULT_RETRY)) { flags |= FAULT_FLAG_TRIED; /* * No need to mmap_read_unlock(mm) as we would * have already released it in __lock_page_or_retry * in mm/filemap.c. */ goto retry; } mmap_read_unlock(mm); done: if (unlikely(fault & VM_FAULT_ERROR)) { tsk->thread.bad_cause = cause; mm_fault_error(regs, addr, fault); return; } return; } |
| 5 12 12 22 22 22 3 22 22 22 22 22 22 24 24 24 24 22 22 26 26 22 22 22 22 22 17 17 17 17 17 6 19 17 17 17 17 25 22 22 21 22 22 22 22 3 3 12 12 12 12 12 12 12 12 12 2 2 2 2 2 2 2 2 2 22 22 22 22 22 22 22 22 22 3 22 22 22 22 22 22 22 22 22 22 22 22 22 22 22 22 22 3 22 3 3 3 3 22 11 12 12 12 12 12 12 11 12 12 12 12 12 12 15 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 | // SPDX-License-Identifier: GPL-2.0-or-later /* * zswap.c - zswap driver file * * zswap is a cache that takes pages that are in the process * of being swapped out and attempts to compress and store them in a * RAM-based memory pool. This can result in a significant I/O reduction on * the swap device and, in the case where decompressing from RAM is faster * than reading from the swap device, can also improve workload performance. * * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/cpu.h> #include <linux/highmem.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/types.h> #include <linux/atomic.h> #include <linux/swap.h> #include <linux/crypto.h> #include <linux/scatterlist.h> #include <linux/mempolicy.h> #include <linux/mempool.h> #include <linux/zpool.h> #include <crypto/acompress.h> #include <linux/zswap.h> #include <linux/mm_types.h> #include <linux/page-flags.h> #include <linux/swapops.h> #include <linux/writeback.h> #include <linux/pagemap.h> #include <linux/workqueue.h> #include <linux/list_lru.h> #include "swap.h" #include "internal.h" /********************************* * statistics **********************************/ /* The number of compressed pages currently stored in zswap */ atomic_long_t zswap_stored_pages = ATOMIC_LONG_INIT(0); /* * The statistics below are not protected from concurrent access for * performance reasons so they may not be a 100% accurate. However, * they do provide useful information on roughly how many times a * certain event is occurring. */ /* Pool limit was hit (see zswap_max_pool_percent) */ static u64 zswap_pool_limit_hit; /* Pages written back when pool limit was reached */ static u64 zswap_written_back_pages; /* Store failed due to a reclaim failure after pool limit was reached */ static u64 zswap_reject_reclaim_fail; /* Store failed due to compression algorithm failure */ static u64 zswap_reject_compress_fail; /* Compressed page was too big for the allocator to (optimally) store */ static u64 zswap_reject_compress_poor; /* Load or writeback failed due to decompression failure */ static u64 zswap_decompress_fail; /* Store failed because underlying allocator could not get memory */ static u64 zswap_reject_alloc_fail; /* Store failed because the entry metadata could not be allocated (rare) */ static u64 zswap_reject_kmemcache_fail; /* Shrinker work queue */ static struct workqueue_struct *shrink_wq; /* Pool limit was hit, we need to calm down */ static bool zswap_pool_reached_full; /********************************* * tunables **********************************/ #define ZSWAP_PARAM_UNSET "" static int zswap_setup(void); /* Enable/disable zswap */ static DEFINE_STATIC_KEY_MAYBE(CONFIG_ZSWAP_DEFAULT_ON, zswap_ever_enabled); static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON); static int zswap_enabled_param_set(const char *, const struct kernel_param *); static const struct kernel_param_ops zswap_enabled_param_ops = { .set = zswap_enabled_param_set, .get = param_get_bool, }; module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644); /* Crypto compressor to use */ static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; static int zswap_compressor_param_set(const char *, const struct kernel_param *); static const struct kernel_param_ops zswap_compressor_param_ops = { .set = zswap_compressor_param_set, .get = param_get_charp, .free = param_free_charp, }; module_param_cb(compressor, &zswap_compressor_param_ops, &zswap_compressor, 0644); /* Compressed storage zpool to use */ static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; static int zswap_zpool_param_set(const char *, const struct kernel_param *); static const struct kernel_param_ops zswap_zpool_param_ops = { .set = zswap_zpool_param_set, .get = param_get_charp, .free = param_free_charp, }; module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644); /* The maximum percentage of memory that the compressed pool can occupy */ static unsigned int zswap_max_pool_percent = 20; module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644); /* The threshold for accepting new pages after the max_pool_percent was hit */ static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ module_param_named(accept_threshold_percent, zswap_accept_thr_percent, uint, 0644); /* Enable/disable memory pressure-based shrinker. */ static bool zswap_shrinker_enabled = IS_ENABLED( CONFIG_ZSWAP_SHRINKER_DEFAULT_ON); module_param_named(shrinker_enabled, zswap_shrinker_enabled, bool, 0644); bool zswap_is_enabled(void) { return zswap_enabled; } bool zswap_never_enabled(void) { return !static_branch_maybe(CONFIG_ZSWAP_DEFAULT_ON, &zswap_ever_enabled); } /********************************* * data structures **********************************/ struct crypto_acomp_ctx { struct crypto_acomp *acomp; struct acomp_req *req; struct crypto_wait wait; u8 *buffer; struct mutex mutex; bool is_sleepable; }; /* * The lock ordering is zswap_tree.lock -> zswap_pool.lru_lock. * The only case where lru_lock is not acquired while holding tree.lock is * when a zswap_entry is taken off the lru for writeback, in that case it * needs to be verified that it's still valid in the tree. */ struct zswap_pool { struct zpool *zpool; struct crypto_acomp_ctx __percpu *acomp_ctx; struct percpu_ref ref; struct list_head list; struct work_struct release_work; struct hlist_node node; char tfm_name[CRYPTO_MAX_ALG_NAME]; }; /* Global LRU lists shared by all zswap pools. */ static struct list_lru zswap_list_lru; /* The lock protects zswap_next_shrink updates. */ static DEFINE_SPINLOCK(zswap_shrink_lock); static struct mem_cgroup *zswap_next_shrink; static struct work_struct zswap_shrink_work; static struct shrinker *zswap_shrinker; /* * struct zswap_entry * * This structure contains the metadata for tracking a single compressed * page within zswap. * * swpentry - associated swap entry, the offset indexes into the red-black tree * length - the length in bytes of the compressed page data. Needed during * decompression. * referenced - true if the entry recently entered the zswap pool. Unset by the * writeback logic. The entry is only reclaimed by the writeback * logic if referenced is unset. See comments in the shrinker * section for context. * pool - the zswap_pool the entry's data is in * handle - zpool allocation handle that stores the compressed page data * objcg - the obj_cgroup that the compressed memory is charged to * lru - handle to the pool's lru used to evict pages. */ struct zswap_entry { swp_entry_t swpentry; unsigned int length; bool referenced; struct zswap_pool *pool; unsigned long handle; struct obj_cgroup *objcg; struct list_head lru; }; static struct xarray *zswap_trees[MAX_SWAPFILES]; static unsigned int nr_zswap_trees[MAX_SWAPFILES]; /* RCU-protected iteration */ static LIST_HEAD(zswap_pools); /* protects zswap_pools list modification */ static DEFINE_SPINLOCK(zswap_pools_lock); /* pool counter to provide unique names to zpool */ static atomic_t zswap_pools_count = ATOMIC_INIT(0); enum zswap_init_type { ZSWAP_UNINIT, ZSWAP_INIT_SUCCEED, ZSWAP_INIT_FAILED }; static enum zswap_init_type zswap_init_state; /* used to ensure the integrity of initialization */ static DEFINE_MUTEX(zswap_init_lock); /* init completed, but couldn't create the initial pool */ static bool zswap_has_pool; /********************************* * helpers and fwd declarations **********************************/ static inline struct xarray *swap_zswap_tree(swp_entry_t swp) { return &zswap_trees[swp_type(swp)][swp_offset(swp) >> SWAP_ADDRESS_SPACE_SHIFT]; } #define zswap_pool_debug(msg, p) \ pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ zpool_get_type((p)->zpool)) /********************************* * pool functions **********************************/ static void __zswap_pool_empty(struct percpu_ref *ref); static struct zswap_pool *zswap_pool_create(char *type, char *compressor) { struct zswap_pool *pool; char name[38]; /* 'zswap' + 32 char (max) num + \0 */ gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; int ret, cpu; if (!zswap_has_pool) { /* if either are unset, pool initialization failed, and we * need both params to be set correctly before trying to * create a pool. */ if (!strcmp(type, ZSWAP_PARAM_UNSET)) return NULL; if (!strcmp(compressor, ZSWAP_PARAM_UNSET)) return NULL; } pool = kzalloc(sizeof(*pool), GFP_KERNEL); if (!pool) return NULL; /* unique name for each pool specifically required by zsmalloc */ snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count)); pool->zpool = zpool_create_pool(type, name, gfp); if (!pool->zpool) { pr_err("%s zpool not available\n", type); goto error; } pr_debug("using %s zpool\n", zpool_get_type(pool->zpool)); strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx); if (!pool->acomp_ctx) { pr_err("percpu alloc failed\n"); goto error; } for_each_possible_cpu(cpu) mutex_init(&per_cpu_ptr(pool->acomp_ctx, cpu)->mutex); ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); if (ret) goto error; /* being the current pool takes 1 ref; this func expects the * caller to always add the new pool as the current pool */ ret = percpu_ref_init(&pool->ref, __zswap_pool_empty, PERCPU_REF_ALLOW_REINIT, GFP_KERNEL); if (ret) goto ref_fail; INIT_LIST_HEAD(&pool->list); zswap_pool_debug("created", pool); return pool; ref_fail: cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); error: if (pool->acomp_ctx) free_percpu(pool->acomp_ctx); if (pool->zpool) zpool_destroy_pool(pool->zpool); kfree(pool); return NULL; } static struct zswap_pool *__zswap_pool_create_fallback(void) { bool has_comp, has_zpool; has_comp = crypto_has_acomp(zswap_compressor, 0, 0); if (!has_comp && strcmp(zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) { pr_err("compressor %s not available, using default %s\n", zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT); param_free_charp(&zswap_compressor); zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; has_comp = crypto_has_acomp(zswap_compressor, 0, 0); } if (!has_comp) { pr_err("default compressor %s not available\n", zswap_compressor); param_free_charp(&zswap_compressor); zswap_compressor = ZSWAP_PARAM_UNSET; } has_zpool = zpool_has_pool(zswap_zpool_type); if (!has_zpool && strcmp(zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT)) { pr_err("zpool %s not available, using default %s\n", zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT); param_free_charp(&zswap_zpool_type); zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; has_zpool = zpool_has_pool(zswap_zpool_type); } if (!has_zpool) { pr_err("default zpool %s not available\n", zswap_zpool_type); param_free_charp(&zswap_zpool_type); zswap_zpool_type = ZSWAP_PARAM_UNSET; } if (!has_comp || !has_zpool) return NULL; return zswap_pool_create(zswap_zpool_type, zswap_compressor); } static void zswap_pool_destroy(struct zswap_pool *pool) { zswap_pool_debug("destroying", pool); cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); free_percpu(pool->acomp_ctx); zpool_destroy_pool(pool->zpool); kfree(pool); } static void __zswap_pool_release(struct work_struct *work) { struct zswap_pool *pool = container_of(work, typeof(*pool), release_work); synchronize_rcu(); /* nobody should have been able to get a ref... */ WARN_ON(!percpu_ref_is_zero(&pool->ref)); percpu_ref_exit(&pool->ref); /* pool is now off zswap_pools list and has no references. */ zswap_pool_destroy(pool); } static struct zswap_pool *zswap_pool_current(void); static void __zswap_pool_empty(struct percpu_ref *ref) { struct zswap_pool *pool; pool = container_of(ref, typeof(*pool), ref); spin_lock_bh(&zswap_pools_lock); WARN_ON(pool == zswap_pool_current()); list_del_rcu(&pool->list); INIT_WORK(&pool->release_work, __zswap_pool_release); schedule_work(&pool->release_work); spin_unlock_bh(&zswap_pools_lock); } static int __must_check zswap_pool_tryget(struct zswap_pool *pool) { if (!pool) return 0; return percpu_ref_tryget(&pool->ref); } /* The caller must already have a reference. */ static void zswap_pool_get(struct zswap_pool *pool) { percpu_ref_get(&pool->ref); } static void zswap_pool_put(struct zswap_pool *pool) { percpu_ref_put(&pool->ref); } static struct zswap_pool *__zswap_pool_current(void) { struct zswap_pool *pool; pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list); WARN_ONCE(!pool && zswap_has_pool, "%s: no page storage pool!\n", __func__); return pool; } static struct zswap_pool *zswap_pool_current(void) { assert_spin_locked(&zswap_pools_lock); return __zswap_pool_current(); } static struct zswap_pool *zswap_pool_current_get(void) { struct zswap_pool *pool; rcu_read_lock(); pool = __zswap_pool_current(); if (!zswap_pool_tryget(pool)) pool = NULL; rcu_read_unlock(); return pool; } /* type and compressor must be null-terminated */ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) { struct zswap_pool *pool; assert_spin_locked(&zswap_pools_lock); list_for_each_entry_rcu(pool, &zswap_pools, list) { if (strcmp(pool->tfm_name, compressor)) continue; if (strcmp(zpool_get_type(pool->zpool), type)) continue; /* if we can't get it, it's about to be destroyed */ if (!zswap_pool_tryget(pool)) continue; return pool; } return NULL; } static unsigned long zswap_max_pages(void) { return totalram_pages() * zswap_max_pool_percent / 100; } static unsigned long zswap_accept_thr_pages(void) { return zswap_max_pages() * zswap_accept_thr_percent / 100; } unsigned long zswap_total_pages(void) { struct zswap_pool *pool; unsigned long total = 0; rcu_read_lock(); list_for_each_entry_rcu(pool, &zswap_pools, list) total += zpool_get_total_pages(pool->zpool); rcu_read_unlock(); return total; } static bool zswap_check_limits(void) { unsigned long cur_pages = zswap_total_pages(); unsigned long max_pages = zswap_max_pages(); if (cur_pages >= max_pages) { zswap_pool_limit_hit++; zswap_pool_reached_full = true; } else if (zswap_pool_reached_full && cur_pages <= zswap_accept_thr_pages()) { zswap_pool_reached_full = false; } return zswap_pool_reached_full; } /********************************* * param callbacks **********************************/ static bool zswap_pool_changed(const char *s, const struct kernel_param *kp) { /* no change required */ if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool) return false; return true; } /* val must be a null-terminated string */ static int __zswap_param_set(const char *val, const struct kernel_param *kp, char *type, char *compressor) { struct zswap_pool *pool, *put_pool = NULL; char *s = strstrip((char *)val); int ret = 0; bool new_pool = false; mutex_lock(&zswap_init_lock); switch (zswap_init_state) { case ZSWAP_UNINIT: /* if this is load-time (pre-init) param setting, * don't create a pool; that's done during init. */ ret = param_set_charp(s, kp); break; case ZSWAP_INIT_SUCCEED: new_pool = zswap_pool_changed(s, kp); break; case ZSWAP_INIT_FAILED: pr_err("can't set param, initialization failed\n"); ret = -ENODEV; } mutex_unlock(&zswap_init_lock); /* no need to create a new pool, return directly */ if (!new_pool) return ret; if (!type) { if (!zpool_has_pool(s)) { pr_err("zpool %s not available\n", s); return -ENOENT; } type = s; } else if (!compressor) { if (!crypto_has_acomp(s, 0, 0)) { pr_err("compressor %s not available\n", s); return -ENOENT; } compressor = s; } else { WARN_ON(1); return -EINVAL; } spin_lock_bh(&zswap_pools_lock); pool = zswap_pool_find_get(type, compressor); if (pool) { zswap_pool_debug("using existing", pool); WARN_ON(pool == zswap_pool_current()); list_del_rcu(&pool->list); } spin_unlock_bh(&zswap_pools_lock); if (!pool) pool = zswap_pool_create(type, compressor); else { /* * Restore the initial ref dropped by percpu_ref_kill() * when the pool was decommissioned and switch it again * to percpu mode. */ percpu_ref_resurrect(&pool->ref); /* Drop the ref from zswap_pool_find_get(). */ zswap_pool_put(pool); } if (pool) ret = param_set_charp(s, kp); else ret = -EINVAL; spin_lock_bh(&zswap_pools_lock); if (!ret) { put_pool = zswap_pool_current(); list_add_rcu(&pool->list, &zswap_pools); zswap_has_pool = true; } else if (pool) { /* add the possibly pre-existing pool to the end of the pools * list; if it's new (and empty) then it'll be removed and * destroyed by the put after we drop the lock */ list_add_tail_rcu(&pool->list, &zswap_pools); put_pool = pool; } spin_unlock_bh(&zswap_pools_lock); if (!zswap_has_pool && !pool) { /* if initial pool creation failed, and this pool creation also * failed, maybe both compressor and zpool params were bad. * Allow changing this param, so pool creation will succeed * when the other param is changed. We already verified this * param is ok in the zpool_has_pool() or crypto_has_acomp() * checks above. */ ret = param_set_charp(s, kp); } /* drop the ref from either the old current pool, * or the new pool we failed to add */ if (put_pool) percpu_ref_kill(&put_pool->ref); return ret; } static int zswap_compressor_param_set(const char *val, const struct kernel_param *kp) { return __zswap_param_set(val, kp, zswap_zpool_type, NULL); } static int zswap_zpool_param_set(const char *val, const struct kernel_param *kp) { return __zswap_param_set(val, kp, NULL, zswap_compressor); } static int zswap_enabled_param_set(const char *val, const struct kernel_param *kp) { int ret = -ENODEV; /* if this is load-time (pre-init) param setting, only set param. */ if (system_state != SYSTEM_RUNNING) return param_set_bool(val, kp); mutex_lock(&zswap_init_lock); switch (zswap_init_state) { case ZSWAP_UNINIT: if (zswap_setup()) break; fallthrough; case ZSWAP_INIT_SUCCEED: if (!zswap_has_pool) pr_err("can't enable, no pool configured\n"); else ret = param_set_bool(val, kp); break; case ZSWAP_INIT_FAILED: pr_err("can't enable, initialization failed\n"); } mutex_unlock(&zswap_init_lock); return ret; } /********************************* * lru functions **********************************/ /* should be called under RCU */ #ifdef CONFIG_MEMCG static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry) { return entry->objcg ? obj_cgroup_memcg(entry->objcg) : NULL; } #else static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry) { return NULL; } #endif static inline int entry_to_nid(struct zswap_entry *entry) { return page_to_nid(virt_to_page(entry)); } static void zswap_lru_add(struct list_lru *list_lru, struct zswap_entry *entry) { int nid = entry_to_nid(entry); struct mem_cgroup *memcg; /* * Note that it is safe to use rcu_read_lock() here, even in the face of * concurrent memcg offlining: * * 1. list_lru_add() is called before list_lru_one is dead. The * new entry will be reparented to memcg's parent's list_lru. * 2. list_lru_add() is called after list_lru_one is dead. The * new entry will be added directly to memcg's parent's list_lru. * * Similar reasoning holds for list_lru_del(). */ rcu_read_lock(); memcg = mem_cgroup_from_entry(entry); /* will always succeed */ list_lru_add(list_lru, &entry->lru, nid, memcg); rcu_read_unlock(); } static void zswap_lru_del(struct list_lru *list_lru, struct zswap_entry *entry) { int nid = entry_to_nid(entry); struct mem_cgroup *memcg; rcu_read_lock(); memcg = mem_cgroup_from_entry(entry); /* will always succeed */ list_lru_del(list_lru, &entry->lru, nid, memcg); rcu_read_unlock(); } void zswap_lruvec_state_init(struct lruvec *lruvec) { atomic_long_set(&lruvec->zswap_lruvec_state.nr_disk_swapins, 0); } void zswap_folio_swapin(struct folio *folio) { struct lruvec *lruvec; if (folio) { lruvec = folio_lruvec(folio); atomic_long_inc(&lruvec->zswap_lruvec_state.nr_disk_swapins); } } /* * This function should be called when a memcg is being offlined. * * Since the global shrinker shrink_worker() may hold a reference * of the memcg, we must check and release the reference in * zswap_next_shrink. * * shrink_worker() must handle the case where this function releases * the reference of memcg being shrunk. */ void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) { /* lock out zswap shrinker walking memcg tree */ spin_lock(&zswap_shrink_lock); if (zswap_next_shrink == memcg) { do { zswap_next_shrink = mem_cgroup_iter(NULL, zswap_next_shrink, NULL); } while (zswap_next_shrink && !mem_cgroup_online(zswap_next_shrink)); } spin_unlock(&zswap_shrink_lock); } /********************************* * zswap entry functions **********************************/ static struct kmem_cache *zswap_entry_cache; static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid) { struct zswap_entry *entry; entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid); if (!entry) return NULL; return entry; } static void zswap_entry_cache_free(struct zswap_entry *entry) { kmem_cache_free(zswap_entry_cache, entry); } /* * Carries out the common pattern of freeing and entry's zpool allocation, * freeing the entry itself, and decrementing the number of stored pages. */ static void zswap_entry_free(struct zswap_entry *entry) { zswap_lru_del(&zswap_list_lru, entry); zpool_free(entry->pool->zpool, entry->handle); zswap_pool_put(entry->pool); if (entry->objcg) { obj_cgroup_uncharge_zswap(entry->objcg, entry->length); obj_cgroup_put(entry->objcg); } zswap_entry_cache_free(entry); atomic_long_dec(&zswap_stored_pages); } /********************************* * compressed storage functions **********************************/ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node) { struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); struct crypto_acomp *acomp = NULL; struct acomp_req *req = NULL; u8 *buffer = NULL; int ret; buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); if (!buffer) { ret = -ENOMEM; goto fail; } acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu)); if (IS_ERR(acomp)) { pr_err("could not alloc crypto acomp %s : %ld\n", pool->tfm_name, PTR_ERR(acomp)); ret = PTR_ERR(acomp); goto fail; } req = acomp_request_alloc(acomp); if (!req) { pr_err("could not alloc crypto acomp_request %s\n", pool->tfm_name); ret = -ENOMEM; goto fail; } /* * Only hold the mutex after completing allocations, otherwise we may * recurse into zswap through reclaim and attempt to hold the mutex * again resulting in a deadlock. */ mutex_lock(&acomp_ctx->mutex); crypto_init_wait(&acomp_ctx->wait); /* * if the backend of acomp is async zip, crypto_req_done() will wakeup * crypto_wait_req(); if the backend of acomp is scomp, the callback * won't be called, crypto_wait_req() will return without blocking. */ acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, crypto_req_done, &acomp_ctx->wait); acomp_ctx->buffer = buffer; acomp_ctx->acomp = acomp; acomp_ctx->is_sleepable = acomp_is_async(acomp); acomp_ctx->req = req; mutex_unlock(&acomp_ctx->mutex); return 0; fail: if (acomp) crypto_free_acomp(acomp); kfree(buffer); return ret; } static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node) { struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); struct acomp_req *req; struct crypto_acomp *acomp; u8 *buffer; if (IS_ERR_OR_NULL(acomp_ctx)) return 0; mutex_lock(&acomp_ctx->mutex); req = acomp_ctx->req; acomp = acomp_ctx->acomp; buffer = acomp_ctx->buffer; acomp_ctx->req = NULL; acomp_ctx->acomp = NULL; acomp_ctx->buffer = NULL; mutex_unlock(&acomp_ctx->mutex); /* * Do the actual freeing after releasing the mutex to avoid subtle * locking dependencies causing deadlocks. */ if (!IS_ERR_OR_NULL(req)) acomp_request_free(req); if (!IS_ERR_OR_NULL(acomp)) crypto_free_acomp(acomp); kfree(buffer); return 0; } static struct crypto_acomp_ctx *acomp_ctx_get_cpu_lock(struct zswap_pool *pool) { struct crypto_acomp_ctx *acomp_ctx; for (;;) { acomp_ctx = raw_cpu_ptr(pool->acomp_ctx); mutex_lock(&acomp_ctx->mutex); if (likely(acomp_ctx->req)) return acomp_ctx; /* * It is possible that we were migrated to a different CPU after * getting the per-CPU ctx but before the mutex was acquired. If * the old CPU got offlined, zswap_cpu_comp_dead() could have * already freed ctx->req (among other things) and set it to * NULL. Just try again on the new CPU that we ended up on. */ mutex_unlock(&acomp_ctx->mutex); } } static void acomp_ctx_put_unlock(struct crypto_acomp_ctx *acomp_ctx) { mutex_unlock(&acomp_ctx->mutex); } static bool zswap_compress(struct page *page, struct zswap_entry *entry, struct zswap_pool *pool) { struct crypto_acomp_ctx *acomp_ctx; struct scatterlist input, output; int comp_ret = 0, alloc_ret = 0; unsigned int dlen = PAGE_SIZE; unsigned long handle; struct zpool *zpool; gfp_t gfp; u8 *dst; acomp_ctx = acomp_ctx_get_cpu_lock(pool); dst = acomp_ctx->buffer; sg_init_table(&input, 1); sg_set_page(&input, page, PAGE_SIZE, 0); /* * We need PAGE_SIZE * 2 here since there maybe over-compression case, * and hardware-accelerators may won't check the dst buffer size, so * giving the dst buffer with enough length to avoid buffer overflow. */ sg_init_one(&output, dst, PAGE_SIZE * 2); acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); /* * it maybe looks a little bit silly that we send an asynchronous request, * then wait for its completion synchronously. This makes the process look * synchronous in fact. * Theoretically, acomp supports users send multiple acomp requests in one * acomp instance, then get those requests done simultaneously. but in this * case, zswap actually does store and load page by page, there is no * existing method to send the second page before the first page is done * in one thread doing zwap. * but in different threads running on different cpu, we have different * acomp instance, so multiple threads can do (de)compression in parallel. */ comp_ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait); dlen = acomp_ctx->req->dlen; if (comp_ret) goto unlock; zpool = pool->zpool; gfp = GFP_NOWAIT | __GFP_NORETRY | __GFP_HIGHMEM | __GFP_MOVABLE; alloc_ret = zpool_malloc(zpool, dlen, gfp, &handle, page_to_nid(page)); if (alloc_ret) goto unlock; zpool_obj_write(zpool, handle, dst, dlen); entry->handle = handle; entry->length = dlen; unlock: if (comp_ret == -ENOSPC || alloc_ret == -ENOSPC) zswap_reject_compress_poor++; else if (comp_ret) zswap_reject_compress_fail++; else if (alloc_ret) zswap_reject_alloc_fail++; acomp_ctx_put_unlock(acomp_ctx); return comp_ret == 0 && alloc_ret == 0; } static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio) { struct zpool *zpool = entry->pool->zpool; struct scatterlist input, output; struct crypto_acomp_ctx *acomp_ctx; int decomp_ret, dlen; u8 *src, *obj; acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool); obj = zpool_obj_read_begin(zpool, entry->handle, acomp_ctx->buffer); /* * zpool_obj_read_begin() might return a kmap address of highmem when * acomp_ctx->buffer is not used. However, sg_init_one() does not * handle highmem addresses, so copy the object to acomp_ctx->buffer. */ if (virt_addr_valid(obj)) { src = obj; } else { WARN_ON_ONCE(obj == acomp_ctx->buffer); memcpy(acomp_ctx->buffer, obj, entry->length); src = acomp_ctx->buffer; } sg_init_one(&input, src, entry->length); sg_init_table(&output, 1); sg_set_folio(&output, folio, PAGE_SIZE, 0); acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE); decomp_ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); dlen = acomp_ctx->req->dlen; zpool_obj_read_end(zpool, entry->handle, obj); acomp_ctx_put_unlock(acomp_ctx); if (!decomp_ret && dlen == PAGE_SIZE) return true; zswap_decompress_fail++; pr_alert_ratelimited("Decompression error from zswap (%d:%lu %s %u->%d)\n", swp_type(entry->swpentry), swp_offset(entry->swpentry), entry->pool->tfm_name, entry->length, dlen); return false; } /********************************* * writeback code **********************************/ /* * Attempts to free an entry by adding a folio to the swap cache, * decompressing the entry data into the folio, and issuing a * bio write to write the folio back to the swap device. * * This can be thought of as a "resumed writeback" of the folio * to the swap device. We are basically resuming the same swap * writeback path that was intercepted with the zswap_store() * in the first place. After the folio has been decompressed into * the swap cache, the compressed version stored by zswap can be * freed. */ static int zswap_writeback_entry(struct zswap_entry *entry, swp_entry_t swpentry) { struct xarray *tree; pgoff_t offset = swp_offset(swpentry); struct folio *folio; struct mempolicy *mpol; bool folio_was_allocated; struct swap_info_struct *si; struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, }; int ret = 0; /* try to allocate swap cache folio */ si = get_swap_device(swpentry); if (!si) return -EEXIST; mpol = get_task_policy(current); folio = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, NO_INTERLEAVE_INDEX, &folio_was_allocated, true); put_swap_device(si); if (!folio) return -ENOMEM; /* * Found an existing folio, we raced with swapin or concurrent * shrinker. We generally writeback cold folios from zswap, and * swapin means the folio just became hot, so skip this folio. * For unlikely concurrent shrinker case, it will be unlinked * and freed when invalidated by the concurrent shrinker anyway. */ if (!folio_was_allocated) { ret = -EEXIST; goto out; } /* * folio is locked, and the swapcache is now secured against * concurrent swapping to and from the slot, and concurrent * swapoff so we can safely dereference the zswap tree here. * Verify that the swap entry hasn't been invalidated and recycled * behind our backs, to avoid overwriting a new swap folio with * old compressed data. Only when this is successful can the entry * be dereferenced. */ tree = swap_zswap_tree(swpentry); if (entry != xa_load(tree, offset)) { ret = -ENOMEM; goto out; } if (!zswap_decompress(entry, folio)) { ret = -EIO; goto out; } xa_erase(tree, offset); count_vm_event(ZSWPWB); if (entry->objcg) count_objcg_events(entry->objcg, ZSWPWB, 1); zswap_entry_free(entry); /* folio is up to date */ folio_mark_uptodate(folio); /* move it to the tail of the inactive list after end_writeback */ folio_set_reclaim(folio); /* start writeback */ __swap_writepage(folio, &wbc); out: if (ret && ret != -EEXIST) { delete_from_swap_cache(folio); folio_unlock(folio); } folio_put(folio); return ret; } /********************************* * shrinker functions **********************************/ /* * The dynamic shrinker is modulated by the following factors: * * 1. Each zswap entry has a referenced bit, which the shrinker unsets (giving * the entry a second chance) before rotating it in the LRU list. If the * entry is considered again by the shrinker, with its referenced bit unset, * it is written back. The writeback rate as a result is dynamically * adjusted by the pool activities - if the pool is dominated by new entries * (i.e lots of recent zswapouts), these entries will be protected and * the writeback rate will slow down. On the other hand, if the pool has a * lot of stagnant entries, these entries will be reclaimed immediately, * effectively increasing the writeback rate. * * 2. Swapins counter: If we observe swapins, it is a sign that we are * overshrinking and should slow down. We maintain a swapins counter, which * is consumed and subtract from the number of eligible objects on the LRU * in zswap_shrinker_count(). * * 3. Compression ratio. The better the workload compresses, the less gains we * can expect from writeback. We scale down the number of objects available * for reclaim by this ratio. */ static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l, void *arg) { struct zswap_entry *entry = container_of(item, struct zswap_entry, lru); bool *encountered_page_in_swapcache = (bool *)arg; swp_entry_t swpentry; enum lru_status ret = LRU_REMOVED_RETRY; int writeback_result; /* * Second chance algorithm: if the entry has its referenced bit set, give it * a second chance. Only clear the referenced bit and rotate it in the * zswap's LRU list. */ if (entry->referenced) { entry->referenced = false; return LRU_ROTATE; } /* * As soon as we drop the LRU lock, the entry can be freed by * a concurrent invalidation. This means the following: * * 1. We extract the swp_entry_t to the stack, allowing * zswap_writeback_entry() to pin the swap entry and * then validate the zwap entry against that swap entry's * tree using pointer value comparison. Only when that * is successful can the entry be dereferenced. * * 2. Usually, objects are taken off the LRU for reclaim. In * this case this isn't possible, because if reclaim fails * for whatever reason, we have no means of knowing if the * entry is alive to put it back on the LRU. * * So rotate it before dropping the lock. If the entry is * written back or invalidated, the free path will unlink * it. For failures, rotation is the right thing as well. * * Temporary failures, where the same entry should be tried * again immediately, almost never happen for this shrinker. * We don't do any trylocking; -ENOMEM comes closest, * but that's extremely rare and doesn't happen spuriously * either. Don't bother distinguishing this case. */ list_move_tail(item, &l->list); /* * Once the lru lock is dropped, the entry might get freed. The * swpentry is copied to the stack, and entry isn't deref'd again * until the entry is verified to still be alive in the tree. */ swpentry = entry->swpentry; /* * It's safe to drop the lock here because we return either * LRU_REMOVED_RETRY, LRU_RETRY or LRU_STOP. */ spin_unlock(&l->lock); writeback_result = zswap_writeback_entry(entry, swpentry); if (writeback_result) { zswap_reject_reclaim_fail++; ret = LRU_RETRY; /* * Encountering a page already in swap cache is a sign that we are shrinking * into the warmer region. We should terminate shrinking (if we're in the dynamic * shrinker context). */ if (writeback_result == -EEXIST && encountered_page_in_swapcache) { ret = LRU_STOP; *encountered_page_in_swapcache = true; } } else { zswap_written_back_pages++; } return ret; } static unsigned long zswap_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { unsigned long shrink_ret; bool encountered_page_in_swapcache = false; if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(sc->memcg)) { sc->nr_scanned = 0; return SHRINK_STOP; } shrink_ret = list_lru_shrink_walk(&zswap_list_lru, sc, &shrink_memcg_cb, &encountered_page_in_swapcache); if (encountered_page_in_swapcache) return SHRINK_STOP; return shrink_ret ? shrink_ret : SHRINK_STOP; } static unsigned long zswap_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) { struct mem_cgroup *memcg = sc->memcg; struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(sc->nid)); atomic_long_t *nr_disk_swapins = &lruvec->zswap_lruvec_state.nr_disk_swapins; unsigned long nr_backing, nr_stored, nr_freeable, nr_disk_swapins_cur, nr_remain; if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(memcg)) return 0; /* * The shrinker resumes swap writeback, which will enter block * and may enter fs. XXX: Harmonize with vmscan.c __GFP_FS * rules (may_enter_fs()), which apply on a per-folio basis. */ if (!gfp_has_io_fs(sc->gfp_mask)) return 0; /* * For memcg, use the cgroup-wide ZSWAP stats since we don't * have them per-node and thus per-lruvec. Careful if memcg is * runtime-disabled: we can get sc->memcg == NULL, which is ok * for the lruvec, but not for memcg_page_state(). * * Without memcg, use the zswap pool-wide metrics. */ if (!mem_cgroup_disabled()) { mem_cgroup_flush_stats(memcg); nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT; nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED); } else { nr_backing = zswap_total_pages(); nr_stored = atomic_long_read(&zswap_stored_pages); } if (!nr_stored) return 0; nr_freeable = list_lru_shrink_count(&zswap_list_lru, sc); if (!nr_freeable) return 0; /* * Subtract from the lru size the number of pages that are recently swapped * in from disk. The idea is that had we protect the zswap's LRU by this * amount of pages, these disk swapins would not have happened. */ nr_disk_swapins_cur = atomic_long_read(nr_disk_swapins); do { if (nr_freeable >= nr_disk_swapins_cur) nr_remain = 0; else nr_remain = nr_disk_swapins_cur - nr_freeable; } while (!atomic_long_try_cmpxchg( nr_disk_swapins, &nr_disk_swapins_cur, nr_remain)); nr_freeable -= nr_disk_swapins_cur - nr_remain; if (!nr_freeable) return 0; /* * Scale the number of freeable pages by the memory saving factor. * This ensures that the better zswap compresses memory, the fewer * pages we will evict to swap (as it will otherwise incur IO for * relatively small memory saving). */ return mult_frac(nr_freeable, nr_backing, nr_stored); } static struct shrinker *zswap_alloc_shrinker(void) { struct shrinker *shrinker; shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, "mm-zswap"); if (!shrinker) return NULL; shrinker->scan_objects = zswap_shrinker_scan; shrinker->count_objects = zswap_shrinker_count; shrinker->batch = 0; shrinker->seeks = DEFAULT_SEEKS; return shrinker; } static int shrink_memcg(struct mem_cgroup *memcg) { int nid, shrunk = 0, scanned = 0; if (!mem_cgroup_zswap_writeback_enabled(memcg)) return -ENOENT; /* * Skip zombies because their LRUs are reparented and we would be * reclaiming from the parent instead of the dead memcg. */ if (memcg && !mem_cgroup_online(memcg)) return -ENOENT; for_each_node_state(nid, N_NORMAL_MEMORY) { unsigned long nr_to_walk = 1; shrunk += list_lru_walk_one(&zswap_list_lru, nid, memcg, &shrink_memcg_cb, NULL, &nr_to_walk); scanned += 1 - nr_to_walk; } if (!scanned) return -ENOENT; return shrunk ? 0 : -EAGAIN; } static void shrink_worker(struct work_struct *w) { struct mem_cgroup *memcg; int ret, failures = 0, attempts = 0; unsigned long thr; /* Reclaim down to the accept threshold */ thr = zswap_accept_thr_pages(); /* * Global reclaim will select cgroup in a round-robin fashion from all * online memcgs, but memcgs that have no pages in zswap and * writeback-disabled memcgs (memory.zswap.writeback=0) are not * candidates for shrinking. * * Shrinking will be aborted if we encounter the following * MAX_RECLAIM_RETRIES times: * - No writeback-candidate memcgs found in a memcg tree walk. * - Shrinking a writeback-candidate memcg failed. * * We save iteration cursor memcg into zswap_next_shrink, * which can be modified by the offline memcg cleaner * zswap_memcg_offline_cleanup(). * * Since the offline cleaner is called only once, we cannot leave an * offline memcg reference in zswap_next_shrink. * We can rely on the cleaner only if we get online memcg under lock. * * If we get an offline memcg, we cannot determine if the cleaner has * already been called or will be called later. We must put back the * reference before returning from this function. Otherwise, the * offline memcg left in zswap_next_shrink will hold the reference * until the next run of shrink_worker(). */ do { /* * Start shrinking from the next memcg after zswap_next_shrink. * When the offline cleaner has already advanced the cursor, * advancing the cursor here overlooks one memcg, but this * should be negligibly rare. * * If we get an online memcg, keep the extra reference in case * the original one obtained by mem_cgroup_iter() is dropped by * zswap_memcg_offline_cleanup() while we are shrinking the * memcg. */ spin_lock(&zswap_shrink_lock); do { memcg = mem_cgroup_iter(NULL, zswap_next_shrink, NULL); zswap_next_shrink = memcg; } while (memcg && !mem_cgroup_tryget_online(memcg)); spin_unlock(&zswap_shrink_lock); if (!memcg) { /* * Continue shrinking without incrementing failures if * we found candidate memcgs in the last tree walk. */ if (!attempts && ++failures == MAX_RECLAIM_RETRIES) break; attempts = 0; goto resched; } ret = shrink_memcg(memcg); /* drop the extra reference */ mem_cgroup_put(memcg); /* * There are no writeback-candidate pages in the memcg. * This is not an issue as long as we can find another memcg * with pages in zswap. Skip this without incrementing attempts * and failures. */ if (ret == -ENOENT) continue; ++attempts; if (ret && ++failures == MAX_RECLAIM_RETRIES) break; resched: cond_resched(); } while (zswap_total_pages() > thr); } /********************************* * main API **********************************/ static bool zswap_store_page(struct page *page, struct obj_cgroup *objcg, struct zswap_pool *pool) { swp_entry_t page_swpentry = page_swap_entry(page); struct zswap_entry *entry, *old; /* allocate entry */ entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page)); if (!entry) { zswap_reject_kmemcache_fail++; return false; } if (!zswap_compress(page, entry, pool)) goto compress_failed; old = xa_store(swap_zswap_tree(page_swpentry), swp_offset(page_swpentry), entry, GFP_KERNEL); if (xa_is_err(old)) { int err = xa_err(old); WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err); zswap_reject_alloc_fail++; goto store_failed; } /* * We may have had an existing entry that became stale when * the folio was redirtied and now the new version is being * swapped out. Get rid of the old. */ if (old) zswap_entry_free(old); /* * The entry is successfully compressed and stored in the tree, there is * no further possibility of failure. Grab refs to the pool and objcg, * charge zswap memory, and increment zswap_stored_pages. * The opposite actions will be performed by zswap_entry_free() * when the entry is removed from the tree. */ zswap_pool_get(pool); if (objcg) { obj_cgroup_get(objcg); obj_cgroup_charge_zswap(objcg, entry->length); } atomic_long_inc(&zswap_stored_pages); /* * We finish initializing the entry while it's already in xarray. * This is safe because: * * 1. Concurrent stores and invalidations are excluded by folio lock. * * 2. Writeback is excluded by the entry not being on the LRU yet. * The publishing order matters to prevent writeback from seeing * an incoherent entry. */ entry->pool = pool; entry->swpentry = page_swpentry; entry->objcg = objcg; entry->referenced = true; if (entry->length) { INIT_LIST_HEAD(&entry->lru); zswap_lru_add(&zswap_list_lru, entry); } return true; store_failed: zpool_free(pool->zpool, entry->handle); compress_failed: zswap_entry_cache_free(entry); return false; } bool zswap_store(struct folio *folio) { long nr_pages = folio_nr_pages(folio); swp_entry_t swp = folio->swap; struct obj_cgroup *objcg = NULL; struct mem_cgroup *memcg = NULL; struct zswap_pool *pool; bool ret = false; long index; VM_WARN_ON_ONCE(!folio_test_locked(folio)); VM_WARN_ON_ONCE(!folio_test_swapcache(folio)); if (!zswap_enabled) goto check_old; objcg = get_obj_cgroup_from_folio(folio); if (objcg && !obj_cgroup_may_zswap(objcg)) { memcg = get_mem_cgroup_from_objcg(objcg); if (shrink_memcg(memcg)) { mem_cgroup_put(memcg); goto put_objcg; } mem_cgroup_put(memcg); } if (zswap_check_limits()) goto put_objcg; pool = zswap_pool_current_get(); if (!pool) goto put_objcg; if (objcg) { memcg = get_mem_cgroup_from_objcg(objcg); if (memcg_list_lru_alloc(memcg, &zswap_list_lru, GFP_KERNEL)) { mem_cgroup_put(memcg); goto put_pool; } mem_cgroup_put(memcg); } for (index = 0; index < nr_pages; ++index) { struct page *page = folio_page(folio, index); if (!zswap_store_page(page, objcg, pool)) goto put_pool; } if (objcg) count_objcg_events(objcg, ZSWPOUT, nr_pages); count_vm_events(ZSWPOUT, nr_pages); ret = true; put_pool: zswap_pool_put(pool); put_objcg: obj_cgroup_put(objcg); if (!ret && zswap_pool_reached_full) queue_work(shrink_wq, &zswap_shrink_work); check_old: /* * If the zswap store fails or zswap is disabled, we must invalidate * the possibly stale entries which were previously stored at the * offsets corresponding to each page of the folio. Otherwise, * writeback could overwrite the new data in the swapfile. */ if (!ret) { unsigned type = swp_type(swp); pgoff_t offset = swp_offset(swp); struct zswap_entry *entry; struct xarray *tree; for (index = 0; index < nr_pages; ++index) { tree = swap_zswap_tree(swp_entry(type, offset + index)); entry = xa_erase(tree, offset + index); if (entry) zswap_entry_free(entry); } } return ret; } /** * zswap_load() - load a folio from zswap * @folio: folio to load * * Return: 0 on success, with the folio unlocked and marked up-to-date, or one * of the following error codes: * * -EIO: if the swapped out content was in zswap, but could not be loaded * into the page due to a decompression failure. The folio is unlocked, but * NOT marked up-to-date, so that an IO error is emitted (e.g. do_swap_page() * will SIGBUS). * * -EINVAL: if the swapped out content was in zswap, but the page belongs * to a large folio, which is not supported by zswap. The folio is unlocked, * but NOT marked up-to-date, so that an IO error is emitted (e.g. * do_swap_page() will SIGBUS). * * -ENOENT: if the swapped out content was not in zswap. The folio remains * locked on return. */ int zswap_load(struct folio *folio) { swp_entry_t swp = folio->swap; pgoff_t offset = swp_offset(swp); bool swapcache = folio_test_swapcache(folio); struct xarray *tree = swap_zswap_tree(swp); struct zswap_entry *entry; VM_WARN_ON_ONCE(!folio_test_locked(folio)); if (zswap_never_enabled()) return -ENOENT; /* * Large folios should not be swapped in while zswap is being used, as * they are not properly handled. Zswap does not properly load large * folios, and a large folio may only be partially in zswap. */ if (WARN_ON_ONCE(folio_test_large(folio))) { folio_unlock(folio); return -EINVAL; } entry = xa_load(tree, offset); if (!entry) return -ENOENT; if (!zswap_decompress(entry, folio)) { folio_unlock(folio); return -EIO; } folio_mark_uptodate(folio); count_vm_event(ZSWPIN); if (entry->objcg) count_objcg_events(entry->objcg, ZSWPIN, 1); /* * When reading into the swapcache, invalidate our entry. The * swapcache can be the authoritative owner of the page and * its mappings, and the pressure that results from having two * in-memory copies outweighs any benefits of caching the * compression work. * * (Most swapins go through the swapcache. The notable * exception is the singleton fault on SWP_SYNCHRONOUS_IO * files, which reads into a private page and may free it if * the fault fails. We remain the primary owner of the entry.) */ if (swapcache) { folio_mark_dirty(folio); xa_erase(tree, offset); zswap_entry_free(entry); } folio_unlock(folio); return 0; } void zswap_invalidate(swp_entry_t swp) { pgoff_t offset = swp_offset(swp); struct xarray *tree = swap_zswap_tree(swp); struct zswap_entry *entry; if (xa_empty(tree)) return; entry = xa_erase(tree, offset); if (entry) zswap_entry_free(entry); } int zswap_swapon(int type, unsigned long nr_pages) { struct xarray *trees, *tree; unsigned int nr, i; nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES); trees = kvcalloc(nr, sizeof(*tree), GFP_KERNEL); if (!trees) { pr_err("alloc failed, zswap disabled for swap type %d\n", type); return -ENOMEM; } for (i = 0; i < nr; i++) xa_init(trees + i); nr_zswap_trees[type] = nr; zswap_trees[type] = trees; return 0; } void zswap_swapoff(int type) { struct xarray *trees = zswap_trees[type]; unsigned int i; if (!trees) return; /* try_to_unuse() invalidated all the entries already */ for (i = 0; i < nr_zswap_trees[type]; i++) WARN_ON_ONCE(!xa_empty(trees + i)); kvfree(trees); nr_zswap_trees[type] = 0; zswap_trees[type] = NULL; } /********************************* * debugfs functions **********************************/ #ifdef CONFIG_DEBUG_FS #include <linux/debugfs.h> static struct dentry *zswap_debugfs_root; static int debugfs_get_total_size(void *data, u64 *val) { *val = zswap_total_pages() * PAGE_SIZE; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(total_size_fops, debugfs_get_total_size, NULL, "%llu\n"); static int debugfs_get_stored_pages(void *data, u64 *val) { *val = atomic_long_read(&zswap_stored_pages); return 0; } DEFINE_DEBUGFS_ATTRIBUTE(stored_pages_fops, debugfs_get_stored_pages, NULL, "%llu\n"); static int zswap_debugfs_init(void) { if (!debugfs_initialized()) return -ENODEV; zswap_debugfs_root = debugfs_create_dir("zswap", NULL); debugfs_create_u64("pool_limit_hit", 0444, zswap_debugfs_root, &zswap_pool_limit_hit); debugfs_create_u64("reject_reclaim_fail", 0444, zswap_debugfs_root, &zswap_reject_reclaim_fail); debugfs_create_u64("reject_alloc_fail", 0444, zswap_debugfs_root, &zswap_reject_alloc_fail); debugfs_create_u64("reject_kmemcache_fail", 0444, zswap_debugfs_root, &zswap_reject_kmemcache_fail); debugfs_create_u64("reject_compress_fail", 0444, zswap_debugfs_root, &zswap_reject_compress_fail); debugfs_create_u64("reject_compress_poor", 0444, zswap_debugfs_root, &zswap_reject_compress_poor); debugfs_create_u64("decompress_fail", 0444, zswap_debugfs_root, &zswap_decompress_fail); debugfs_create_u64("written_back_pages", 0444, zswap_debugfs_root, &zswap_written_back_pages); debugfs_create_file("pool_total_size", 0444, zswap_debugfs_root, NULL, &total_size_fops); debugfs_create_file("stored_pages", 0444, zswap_debugfs_root, NULL, &stored_pages_fops); return 0; } #else static int zswap_debugfs_init(void) { return 0; } #endif /********************************* * module init and exit **********************************/ static int zswap_setup(void) { struct zswap_pool *pool; int ret; zswap_entry_cache = KMEM_CACHE(zswap_entry, 0); if (!zswap_entry_cache) { pr_err("entry cache creation failed\n"); goto cache_fail; } ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE, "mm/zswap_pool:prepare", zswap_cpu_comp_prepare, zswap_cpu_comp_dead); if (ret) goto hp_fail; shrink_wq = alloc_workqueue("zswap-shrink", WQ_UNBOUND|WQ_MEM_RECLAIM, 1); if (!shrink_wq) goto shrink_wq_fail; zswap_shrinker = zswap_alloc_shrinker(); if (!zswap_shrinker) goto shrinker_fail; if (list_lru_init_memcg(&zswap_list_lru, zswap_shrinker)) goto lru_fail; shrinker_register(zswap_shrinker); INIT_WORK(&zswap_shrink_work, shrink_worker); pool = __zswap_pool_create_fallback(); if (pool) { pr_info("loaded using pool %s/%s\n", pool->tfm_name, zpool_get_type(pool->zpool)); list_add(&pool->list, &zswap_pools); zswap_has_pool = true; static_branch_enable(&zswap_ever_enabled); } else { pr_err("pool creation failed\n"); zswap_enabled = false; } if (zswap_debugfs_init()) pr_warn("debugfs initialization failed\n"); zswap_init_state = ZSWAP_INIT_SUCCEED; return 0; lru_fail: shrinker_free(zswap_shrinker); shrinker_fail: destroy_workqueue(shrink_wq); shrink_wq_fail: cpuhp_remove_multi_state(CPUHP_MM_ZSWP_POOL_PREPARE); hp_fail: kmem_cache_destroy(zswap_entry_cache); cache_fail: /* if built-in, we aren't unloaded on failure; don't allow use */ zswap_init_state = ZSWAP_INIT_FAILED; zswap_enabled = false; return -ENOMEM; } static int __init zswap_init(void) { if (!zswap_enabled) return 0; return zswap_setup(); } /* must be late so crypto has time to come up */ late_initcall(zswap_init); MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>"); MODULE_DESCRIPTION("Compressed cache for swap pages"); |
| 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 | // SPDX-License-Identifier: GPL-2.0 /* * Driver for Phoenix RC Flight Controller Adapter * * Copyright (C) 2018 Marcus Folkesson <marcus.folkesson@gmail.com> */ #include <linux/cleanup.h> #include <linux/errno.h> #include <linux/input.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/usb/input.h> #define PXRC_VENDOR_ID 0x1781 #define PXRC_PRODUCT_ID 0x0898 struct pxrc { struct input_dev *input; struct usb_interface *intf; struct urb *urb; struct mutex pm_mutex; bool is_open; char phys[64]; }; static void pxrc_usb_irq(struct urb *urb) { struct pxrc *pxrc = urb->context; u8 *data = urb->transfer_buffer; int error; switch (urb->status) { case 0: /* success */ break; case -ETIME: /* this urb is timing out */ dev_dbg(&pxrc->intf->dev, "%s - urb timed out - was the device unplugged?\n", __func__); return; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: case -EPIPE: /* this urb is terminated, clean up */ dev_dbg(&pxrc->intf->dev, "%s - urb shutting down with status: %d\n", __func__, urb->status); return; default: dev_dbg(&pxrc->intf->dev, "%s - nonzero urb status received: %d\n", __func__, urb->status); goto exit; } if (urb->actual_length == 8) { input_report_abs(pxrc->input, ABS_X, data[0]); input_report_abs(pxrc->input, ABS_Y, data[2]); input_report_abs(pxrc->input, ABS_RX, data[3]); input_report_abs(pxrc->input, ABS_RY, data[4]); input_report_abs(pxrc->input, ABS_RUDDER, data[5]); input_report_abs(pxrc->input, ABS_THROTTLE, data[6]); input_report_abs(pxrc->input, ABS_MISC, data[7]); input_report_key(pxrc->input, BTN_A, data[1]); } exit: /* Resubmit to fetch new fresh URBs */ error = usb_submit_urb(urb, GFP_ATOMIC); if (error && error != -EPERM) dev_err(&pxrc->intf->dev, "%s - usb_submit_urb failed with result: %d", __func__, error); } static int pxrc_open(struct input_dev *input) { struct pxrc *pxrc = input_get_drvdata(input); int error; guard(mutex)(&pxrc->pm_mutex); error = usb_submit_urb(pxrc->urb, GFP_KERNEL); if (error) { dev_err(&pxrc->intf->dev, "%s - usb_submit_urb failed, error: %d\n", __func__, error); return -EIO; } pxrc->is_open = true; return 0; } static void pxrc_close(struct input_dev *input) { struct pxrc *pxrc = input_get_drvdata(input); guard(mutex)(&pxrc->pm_mutex); usb_kill_urb(pxrc->urb); pxrc->is_open = false; } static void pxrc_free_urb(void *_pxrc) { struct pxrc *pxrc = _pxrc; usb_free_urb(pxrc->urb); } static int pxrc_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); struct pxrc *pxrc; struct usb_endpoint_descriptor *epirq; size_t xfer_size; void *xfer_buf; int error; /* * Locate the endpoint information. This device only has an * interrupt endpoint. */ error = usb_find_common_endpoints(intf->cur_altsetting, NULL, NULL, &epirq, NULL); if (error) { dev_err(&intf->dev, "Could not find endpoint\n"); return error; } pxrc = devm_kzalloc(&intf->dev, sizeof(*pxrc), GFP_KERNEL); if (!pxrc) return -ENOMEM; mutex_init(&pxrc->pm_mutex); pxrc->intf = intf; usb_set_intfdata(pxrc->intf, pxrc); xfer_size = usb_endpoint_maxp(epirq); xfer_buf = devm_kmalloc(&intf->dev, xfer_size, GFP_KERNEL); if (!xfer_buf) return -ENOMEM; pxrc->urb = usb_alloc_urb(0, GFP_KERNEL); if (!pxrc->urb) return -ENOMEM; error = devm_add_action_or_reset(&intf->dev, pxrc_free_urb, pxrc); if (error) return error; usb_fill_int_urb(pxrc->urb, udev, usb_rcvintpipe(udev, epirq->bEndpointAddress), xfer_buf, xfer_size, pxrc_usb_irq, pxrc, 1); pxrc->input = devm_input_allocate_device(&intf->dev); if (!pxrc->input) { dev_err(&intf->dev, "couldn't allocate input device\n"); return -ENOMEM; } pxrc->input->name = "PXRC Flight Controller Adapter"; usb_make_path(udev, pxrc->phys, sizeof(pxrc->phys)); strlcat(pxrc->phys, "/input0", sizeof(pxrc->phys)); pxrc->input->phys = pxrc->phys; usb_to_input_id(udev, &pxrc->input->id); pxrc->input->open = pxrc_open; pxrc->input->close = pxrc_close; input_set_capability(pxrc->input, EV_KEY, BTN_A); input_set_abs_params(pxrc->input, ABS_X, 0, 255, 0, 0); input_set_abs_params(pxrc->input, ABS_Y, 0, 255, 0, 0); input_set_abs_params(pxrc->input, ABS_RX, 0, 255, 0, 0); input_set_abs_params(pxrc->input, ABS_RY, 0, 255, 0, 0); input_set_abs_params(pxrc->input, ABS_RUDDER, 0, 255, 0, 0); input_set_abs_params(pxrc->input, ABS_THROTTLE, 0, 255, 0, 0); input_set_abs_params(pxrc->input, ABS_MISC, 0, 255, 0, 0); input_set_drvdata(pxrc->input, pxrc); error = input_register_device(pxrc->input); if (error) return error; return 0; } static void pxrc_disconnect(struct usb_interface *intf) { /* All driver resources are devm-managed. */ } static int pxrc_suspend(struct usb_interface *intf, pm_message_t message) { struct pxrc *pxrc = usb_get_intfdata(intf); guard(mutex)(&pxrc->pm_mutex); if (pxrc->is_open) usb_kill_urb(pxrc->urb); return 0; } static int pxrc_resume(struct usb_interface *intf) { struct pxrc *pxrc = usb_get_intfdata(intf); guard(mutex)(&pxrc->pm_mutex); if (pxrc->is_open && usb_submit_urb(pxrc->urb, GFP_KERNEL) < 0) return -EIO; return 0; } static int pxrc_pre_reset(struct usb_interface *intf) { struct pxrc *pxrc = usb_get_intfdata(intf); mutex_lock(&pxrc->pm_mutex); usb_kill_urb(pxrc->urb); return 0; } static int pxrc_post_reset(struct usb_interface *intf) { struct pxrc *pxrc = usb_get_intfdata(intf); int retval = 0; if (pxrc->is_open && usb_submit_urb(pxrc->urb, GFP_KERNEL) < 0) retval = -EIO; mutex_unlock(&pxrc->pm_mutex); return retval; } static int pxrc_reset_resume(struct usb_interface *intf) { return pxrc_resume(intf); } static const struct usb_device_id pxrc_table[] = { { USB_DEVICE(PXRC_VENDOR_ID, PXRC_PRODUCT_ID) }, { } }; MODULE_DEVICE_TABLE(usb, pxrc_table); static struct usb_driver pxrc_driver = { .name = "pxrc", .probe = pxrc_probe, .disconnect = pxrc_disconnect, .id_table = pxrc_table, .suspend = pxrc_suspend, .resume = pxrc_resume, .pre_reset = pxrc_pre_reset, .post_reset = pxrc_post_reset, .reset_resume = pxrc_reset_resume, }; module_usb_driver(pxrc_driver); MODULE_AUTHOR("Marcus Folkesson <marcus.folkesson@gmail.com>"); MODULE_DESCRIPTION("PhoenixRC Flight Controller Adapter"); MODULE_LICENSE("GPL v2"); |
| 18 18 16 16 18 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | // SPDX-License-Identifier: GPL-2.0-or-later /* mpihelp-lshift.c - MPI helper functions * Copyright (C) 1994, 1996, 1998, 2001 Free Software Foundation, Inc. * * This file is part of GnuPG. * * Note: This code is heavily based on the GNU MP Library. * Actually it's the same code with only minor changes in the * way the data is stored; this is to support the abstraction * of an optional secure memory allocation which may be used * to avoid revealing of sensitive data due to paging etc. * The GNU MP Library itself is published under the LGPL; * however I decided to publish this code under the plain GPL. */ #include "mpi-internal.h" /* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left * and store the USIZE least significant digits of the result at WP. * Return the bits shifted out from the most significant digit. * * Argument constraints: * 1. 0 < CNT < BITS_PER_MP_LIMB * 2. If the result is to be written over the input, WP must be >= UP. */ mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned int cnt) { mpi_limb_t high_limb, low_limb; unsigned sh_1, sh_2; mpi_size_t i; mpi_limb_t retval; sh_1 = cnt; wp += 1; sh_2 = BITS_PER_MPI_LIMB - sh_1; i = usize - 1; low_limb = up[i]; retval = low_limb >> sh_2; high_limb = low_limb; while (--i >= 0) { low_limb = up[i]; wp[i] = (high_limb << sh_1) | (low_limb >> sh_2); high_limb = low_limb; } wp[i] = high_limb << sh_1; return retval; } |
| 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 | // SPDX-License-Identifier: GPL-2.0 /* Driver for ETAS GmbH ES58X USB CAN(-FD) Bus Interfaces. * * File es58x_core.c: Core logic to manage the network devices and the * USB interface. * * Copyright (c) 2019 Robert Bosch Engineering and Business Solutions. All rights reserved. * Copyright (c) 2020 ETAS K.K.. All rights reserved. * Copyright (c) 2020-2022 Vincent Mailhol <mailhol.vincent@wanadoo.fr> */ #include <linux/unaligned.h> #include <linux/crc16.h> #include <linux/ethtool.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/usb.h> #include <net/devlink.h> #include "es58x_core.h" MODULE_AUTHOR("Vincent Mailhol <mailhol.vincent@wanadoo.fr>"); MODULE_AUTHOR("Arunachalam Santhanam <arunachalam.santhanam@in.bosch.com>"); MODULE_DESCRIPTION("Socket CAN driver for ETAS ES58X USB adapters"); MODULE_LICENSE("GPL v2"); #define ES58X_VENDOR_ID 0x108C #define ES581_4_PRODUCT_ID 0x0159 #define ES582_1_PRODUCT_ID 0x0168 #define ES584_1_PRODUCT_ID 0x0169 /* ES58X FD has some interface protocols unsupported by this driver. */ #define ES58X_FD_INTERFACE_PROTOCOL 0 /* Table of devices which work with this driver. */ static const struct usb_device_id es58x_id_table[] = { { /* ETAS GmbH ES581.4 USB dual-channel CAN Bus Interface module. */ USB_DEVICE(ES58X_VENDOR_ID, ES581_4_PRODUCT_ID), .driver_info = ES58X_DUAL_CHANNEL }, { /* ETAS GmbH ES582.1 USB dual-channel CAN FD Bus Interface module. */ USB_DEVICE_INTERFACE_PROTOCOL(ES58X_VENDOR_ID, ES582_1_PRODUCT_ID, ES58X_FD_INTERFACE_PROTOCOL), .driver_info = ES58X_DUAL_CHANNEL | ES58X_FD_FAMILY }, { /* ETAS GmbH ES584.1 USB single-channel CAN FD Bus Interface module. */ USB_DEVICE_INTERFACE_PROTOCOL(ES58X_VENDOR_ID, ES584_1_PRODUCT_ID, ES58X_FD_INTERFACE_PROTOCOL), .driver_info = ES58X_FD_FAMILY }, { /* Terminating entry */ } }; MODULE_DEVICE_TABLE(usb, es58x_id_table); #define es58x_print_hex_dump(buf, len) \ print_hex_dump(KERN_DEBUG, \ KBUILD_MODNAME " " __stringify(buf) ": ", \ DUMP_PREFIX_NONE, 16, 1, buf, len, false) #define es58x_print_hex_dump_debug(buf, len) \ print_hex_dump_debug(KBUILD_MODNAME " " __stringify(buf) ": ",\ DUMP_PREFIX_NONE, 16, 1, buf, len, false) /* The last two bytes of an ES58X command is a CRC16. The first two * bytes (the start of frame) are skipped and the CRC calculation * starts on the third byte. */ #define ES58X_CRC_CALC_OFFSET sizeof_field(union es58x_urb_cmd, sof) /** * es58x_calculate_crc() - Compute the crc16 of a given URB. * @urb_cmd: The URB command for which we want to calculate the CRC. * @urb_len: Length of @urb_cmd. Must be at least bigger than 4 * (ES58X_CRC_CALC_OFFSET + sizeof(crc)) * * Return: crc16 value. */ static u16 es58x_calculate_crc(const union es58x_urb_cmd *urb_cmd, u16 urb_len) { u16 crc; ssize_t len = urb_len - ES58X_CRC_CALC_OFFSET - sizeof(crc); crc = crc16(0, &urb_cmd->raw_cmd[ES58X_CRC_CALC_OFFSET], len); return crc; } /** * es58x_get_crc() - Get the CRC value of a given URB. * @urb_cmd: The URB command for which we want to get the CRC. * @urb_len: Length of @urb_cmd. Must be at least bigger than 4 * (ES58X_CRC_CALC_OFFSET + sizeof(crc)) * * Return: crc16 value. */ static u16 es58x_get_crc(const union es58x_urb_cmd *urb_cmd, u16 urb_len) { u16 crc; const __le16 *crc_addr; crc_addr = (__le16 *)&urb_cmd->raw_cmd[urb_len - sizeof(crc)]; crc = get_unaligned_le16(crc_addr); return crc; } /** * es58x_set_crc() - Set the CRC value of a given URB. * @urb_cmd: The URB command for which we want to get the CRC. * @urb_len: Length of @urb_cmd. Must be at least bigger than 4 * (ES58X_CRC_CALC_OFFSET + sizeof(crc)) */ static void es58x_set_crc(union es58x_urb_cmd *urb_cmd, u16 urb_len) { u16 crc; __le16 *crc_addr; crc = es58x_calculate_crc(urb_cmd, urb_len); crc_addr = (__le16 *)&urb_cmd->raw_cmd[urb_len - sizeof(crc)]; put_unaligned_le16(crc, crc_addr); } /** * es58x_check_crc() - Validate the CRC value of a given URB. * @es58x_dev: ES58X device. * @urb_cmd: The URB command for which we want to check the CRC. * @urb_len: Length of @urb_cmd. Must be at least bigger than 4 * (ES58X_CRC_CALC_OFFSET + sizeof(crc)) * * Return: zero on success, -EBADMSG if the CRC check fails. */ static int es58x_check_crc(struct es58x_device *es58x_dev, const union es58x_urb_cmd *urb_cmd, u16 urb_len) { u16 calculated_crc = es58x_calculate_crc(urb_cmd, urb_len); u16 expected_crc = es58x_get_crc(urb_cmd, urb_len); if (expected_crc != calculated_crc) { dev_err_ratelimited(es58x_dev->dev, "%s: Bad CRC, urb_len: %d\n", __func__, urb_len); return -EBADMSG; } return 0; } /** * es58x_timestamp_to_ns() - Convert a timestamp value received from a * ES58X device to nanoseconds. * @timestamp: Timestamp received from a ES58X device. * * The timestamp received from ES58X is expressed in multiples of 0.5 * micro seconds. This function converts it in to nanoseconds. * * Return: Timestamp value in nanoseconds. */ static u64 es58x_timestamp_to_ns(u64 timestamp) { const u64 es58x_timestamp_ns_mult_coef = 500ULL; return es58x_timestamp_ns_mult_coef * timestamp; } /** * es58x_set_skb_timestamp() - Set the hardware timestamp of an skb. * @netdev: CAN network device. * @skb: socket buffer of a CAN message. * @timestamp: Timestamp received from an ES58X device. * * Used for both received and echo messages. */ static void es58x_set_skb_timestamp(struct net_device *netdev, struct sk_buff *skb, u64 timestamp) { struct es58x_device *es58x_dev = es58x_priv(netdev)->es58x_dev; struct skb_shared_hwtstamps *hwts; hwts = skb_hwtstamps(skb); /* Ignoring overflow (overflow on 64 bits timestamp with nano * second precision would occur after more than 500 years). */ hwts->hwtstamp = ns_to_ktime(es58x_timestamp_to_ns(timestamp) + es58x_dev->realtime_diff_ns); } /** * es58x_rx_timestamp() - Handle a received timestamp. * @es58x_dev: ES58X device. * @timestamp: Timestamp received from a ES58X device. * * Calculate the difference between the ES58X device and the kernel * internal clocks. This difference will be later used as an offset to * convert the timestamps of RX and echo messages to match the kernel * system time (e.g. convert to UNIX time). */ void es58x_rx_timestamp(struct es58x_device *es58x_dev, u64 timestamp) { u64 ktime_real_ns = ktime_get_real_ns(); u64 device_timestamp = es58x_timestamp_to_ns(timestamp); dev_dbg(es58x_dev->dev, "%s: request round-trip time: %llu ns\n", __func__, ktime_real_ns - es58x_dev->ktime_req_ns); es58x_dev->realtime_diff_ns = (es58x_dev->ktime_req_ns + ktime_real_ns) / 2 - device_timestamp; es58x_dev->ktime_req_ns = 0; dev_dbg(es58x_dev->dev, "%s: Device timestamp: %llu, diff with kernel: %llu\n", __func__, device_timestamp, es58x_dev->realtime_diff_ns); } /** * es58x_set_realtime_diff_ns() - Calculate difference between the * clocks of the ES58X device and the kernel * @es58x_dev: ES58X device. * * Request a timestamp from the ES58X device. Once the answer is * received, the timestamp difference will be set by the callback * function es58x_rx_timestamp(). * * Return: zero on success, errno when any error occurs. */ static int es58x_set_realtime_diff_ns(struct es58x_device *es58x_dev) { if (es58x_dev->ktime_req_ns) { dev_warn(es58x_dev->dev, "%s: Previous request to set timestamp has not completed yet\n", __func__); return -EBUSY; } es58x_dev->ktime_req_ns = ktime_get_real_ns(); return es58x_dev->ops->get_timestamp(es58x_dev); } /** * es58x_is_can_state_active() - Is the network device in an active * CAN state? * @netdev: CAN network device. * * The device is considered active if it is able to send or receive * CAN frames, that is to say if it is in any of * CAN_STATE_ERROR_ACTIVE, CAN_STATE_ERROR_WARNING or * CAN_STATE_ERROR_PASSIVE states. * * Caution: when recovering from a bus-off, * net/core/dev.c#can_restart() will call * net/core/dev.c#can_flush_echo_skb() without using any kind of * locks. For this reason, it is critical to guarantee that no TX or * echo operations (i.e. any access to priv->echo_skb[]) can be done * while this function is returning false. * * Return: true if the device is active, else returns false. */ static bool es58x_is_can_state_active(struct net_device *netdev) { return es58x_priv(netdev)->can.state < CAN_STATE_BUS_OFF; } /** * es58x_is_echo_skb_threshold_reached() - Determine the limit of how * many skb slots can be taken before we should stop the network * queue. * @priv: ES58X private parameters related to the network device. * * We need to save enough free skb slots in order to be able to do * bulk send. This function can be used to determine when to wake or * stop the network queue in regard to the number of skb slots already * taken if the echo FIFO. * * Return: boolean. */ static bool es58x_is_echo_skb_threshold_reached(struct es58x_priv *priv) { u32 num_echo_skb = priv->tx_head - priv->tx_tail; u32 threshold = priv->can.echo_skb_max - priv->es58x_dev->param->tx_bulk_max + 1; return num_echo_skb >= threshold; } /** * es58x_can_free_echo_skb_tail() - Remove the oldest echo skb of the * echo FIFO. * @netdev: CAN network device. * * Naming convention: the tail is the beginning of the FIFO, i.e. the * first skb to have entered the FIFO. */ static void es58x_can_free_echo_skb_tail(struct net_device *netdev) { struct es58x_priv *priv = es58x_priv(netdev); u16 fifo_mask = priv->es58x_dev->param->fifo_mask; unsigned int frame_len = 0; can_free_echo_skb(netdev, priv->tx_tail & fifo_mask, &frame_len); netdev_completed_queue(netdev, 1, frame_len); priv->tx_tail++; netdev->stats.tx_dropped++; } /** * es58x_can_get_echo_skb_recovery() - Try to re-sync the echo FIFO. * @netdev: CAN network device. * @rcv_packet_idx: Index * * This function should not be called under normal circumstances. In * the unlikely case that one or several URB packages get dropped by * the device, the index will get out of sync. Try to recover by * dropping the echo skb packets with older indexes. * * Return: zero if recovery was successful, -EINVAL otherwise. */ static int es58x_can_get_echo_skb_recovery(struct net_device *netdev, u32 rcv_packet_idx) { struct es58x_priv *priv = es58x_priv(netdev); int ret = 0; netdev->stats.tx_errors++; if (net_ratelimit()) netdev_warn(netdev, "Bad echo packet index: %u. First index: %u, end index %u, num_echo_skb: %02u/%02u\n", rcv_packet_idx, priv->tx_tail, priv->tx_head, priv->tx_head - priv->tx_tail, priv->can.echo_skb_max); if ((s32)(rcv_packet_idx - priv->tx_tail) < 0) { if (net_ratelimit()) netdev_warn(netdev, "Received echo index is from the past. Ignoring it\n"); ret = -EINVAL; } else if ((s32)(rcv_packet_idx - priv->tx_head) >= 0) { if (net_ratelimit()) netdev_err(netdev, "Received echo index is from the future. Ignoring it\n"); ret = -EINVAL; } else { if (net_ratelimit()) netdev_warn(netdev, "Recovery: dropping %u echo skb from index %u to %u\n", rcv_packet_idx - priv->tx_tail, priv->tx_tail, rcv_packet_idx - 1); while (priv->tx_tail != rcv_packet_idx) { if (priv->tx_tail == priv->tx_head) return -EINVAL; es58x_can_free_echo_skb_tail(netdev); } } return ret; } /** * es58x_can_get_echo_skb() - Get the skb from the echo FIFO and loop * it back locally. * @netdev: CAN network device. * @rcv_packet_idx: Index of the first packet received from the device. * @tstamps: Array of hardware timestamps received from a ES58X device. * @pkts: Number of packets (and so, length of @tstamps). * * Callback function for when we receive a self reception * acknowledgment. Retrieves the skb from the echo FIFO, sets its * hardware timestamp (the actual time it was sent) and loops it back * locally. * * The device has to be active (i.e. network interface UP and not in * bus off state or restarting). * * Packet indexes must be consecutive (i.e. index of first packet is * @rcv_packet_idx, index of second packet is @rcv_packet_idx + 1 and * index of last packet is @rcv_packet_idx + @pkts - 1). * * Return: zero on success. */ int es58x_can_get_echo_skb(struct net_device *netdev, u32 rcv_packet_idx, u64 *tstamps, unsigned int pkts) { struct es58x_priv *priv = es58x_priv(netdev); unsigned int rx_total_frame_len = 0; unsigned int num_echo_skb = priv->tx_head - priv->tx_tail; int i; u16 fifo_mask = priv->es58x_dev->param->fifo_mask; if (!netif_running(netdev)) { if (net_ratelimit()) netdev_info(netdev, "%s: %s is down, dropping %d echo packets\n", __func__, netdev->name, pkts); netdev->stats.tx_dropped += pkts; return 0; } else if (!es58x_is_can_state_active(netdev)) { if (net_ratelimit()) netdev_dbg(netdev, "Bus is off or device is restarting. Ignoring %u echo packets from index %u\n", pkts, rcv_packet_idx); /* stats.tx_dropped will be (or was already) * incremented by * drivers/net/can/net/dev.c:can_flush_echo_skb(). */ return 0; } else if (num_echo_skb == 0) { if (net_ratelimit()) netdev_warn(netdev, "Received %u echo packets from index: %u but echo skb queue is empty.\n", pkts, rcv_packet_idx); netdev->stats.tx_dropped += pkts; return 0; } if (priv->tx_tail != rcv_packet_idx) { if (es58x_can_get_echo_skb_recovery(netdev, rcv_packet_idx) < 0) { if (net_ratelimit()) netdev_warn(netdev, "Could not find echo skb for echo packet index: %u\n", rcv_packet_idx); return 0; } } if (num_echo_skb < pkts) { int pkts_drop = pkts - num_echo_skb; if (net_ratelimit()) netdev_err(netdev, "Received %u echo packets but have only %d echo skb. Dropping %d echo skb\n", pkts, num_echo_skb, pkts_drop); netdev->stats.tx_dropped += pkts_drop; pkts -= pkts_drop; } for (i = 0; i < pkts; i++) { unsigned int skb_idx = priv->tx_tail & fifo_mask; struct sk_buff *skb = priv->can.echo_skb[skb_idx]; unsigned int frame_len = 0; if (skb) es58x_set_skb_timestamp(netdev, skb, tstamps[i]); netdev->stats.tx_bytes += can_get_echo_skb(netdev, skb_idx, &frame_len); rx_total_frame_len += frame_len; priv->tx_tail++; } netdev_completed_queue(netdev, pkts, rx_total_frame_len); netdev->stats.tx_packets += pkts; priv->err_passive_before_rtx_success = 0; if (!es58x_is_echo_skb_threshold_reached(priv)) netif_wake_queue(netdev); return 0; } /** * es58x_can_reset_echo_fifo() - Reset the echo FIFO. * @netdev: CAN network device. * * The echo_skb array of struct can_priv will be flushed by * drivers/net/can/dev.c:can_flush_echo_skb(). This function resets * the parameters of the struct es58x_priv of our device and reset the * queue (c.f. BQL). */ static void es58x_can_reset_echo_fifo(struct net_device *netdev) { struct es58x_priv *priv = es58x_priv(netdev); priv->tx_tail = 0; priv->tx_head = 0; priv->tx_urb = NULL; priv->err_passive_before_rtx_success = 0; netdev_reset_queue(netdev); } /** * es58x_flush_pending_tx_msg() - Reset the buffer for transmission messages. * @netdev: CAN network device. * * es58x_start_xmit() will queue up to tx_bulk_max messages in * &tx_urb buffer and do a bulk send of all messages in one single URB * (c.f. xmit_more flag). When the device recovers from a bus off * state or when the device stops, the tx_urb buffer might still have * pending messages in it and thus need to be flushed. */ static void es58x_flush_pending_tx_msg(struct net_device *netdev) { struct es58x_priv *priv = es58x_priv(netdev); struct es58x_device *es58x_dev = priv->es58x_dev; if (priv->tx_urb) { netdev_warn(netdev, "%s: dropping %d TX messages\n", __func__, priv->tx_can_msg_cnt); netdev->stats.tx_dropped += priv->tx_can_msg_cnt; while (priv->tx_can_msg_cnt > 0) { unsigned int frame_len = 0; u16 fifo_mask = priv->es58x_dev->param->fifo_mask; priv->tx_head--; priv->tx_can_msg_cnt--; can_free_echo_skb(netdev, priv->tx_head & fifo_mask, &frame_len); netdev_completed_queue(netdev, 1, frame_len); } usb_anchor_urb(priv->tx_urb, &priv->es58x_dev->tx_urbs_idle); atomic_inc(&es58x_dev->tx_urbs_idle_cnt); usb_free_urb(priv->tx_urb); } priv->tx_urb = NULL; } /** * es58x_tx_ack_msg() - Handle acknowledgment messages. * @netdev: CAN network device. * @tx_free_entries: Number of free entries in the device transmit FIFO. * @rx_cmd_ret_u32: error code as returned by the ES58X device. * * ES58X sends an acknowledgment message after a transmission request * is done. This is mandatory for the ES581.4 but is optional (and * deactivated in this driver) for the ES58X_FD family. * * Under normal circumstances, this function should never throw an * error message. * * Return: zero on success, errno when any error occurs. */ int es58x_tx_ack_msg(struct net_device *netdev, u16 tx_free_entries, enum es58x_ret_u32 rx_cmd_ret_u32) { struct es58x_priv *priv = es58x_priv(netdev); if (tx_free_entries <= priv->es58x_dev->param->tx_bulk_max) { if (net_ratelimit()) netdev_err(netdev, "Only %d entries left in device queue, num_echo_skb: %d/%d\n", tx_free_entries, priv->tx_head - priv->tx_tail, priv->can.echo_skb_max); netif_stop_queue(netdev); } return es58x_rx_cmd_ret_u32(netdev, ES58X_RET_TYPE_TX_MSG, rx_cmd_ret_u32); } /** * es58x_rx_can_msg() - Handle a received a CAN message. * @netdev: CAN network device. * @timestamp: Hardware time stamp (only relevant in rx branches). * @data: CAN payload. * @can_id: CAN ID. * @es58x_flags: Please refer to enum es58x_flag. * @dlc: Data Length Code (raw value). * * Fill up a CAN skb and post it. * * This function handles the case where the DLC of a classical CAN * frame is greater than CAN_MAX_DLEN (c.f. the len8_dlc field of * struct can_frame). * * Return: zero on success. */ int es58x_rx_can_msg(struct net_device *netdev, u64 timestamp, const u8 *data, canid_t can_id, enum es58x_flag es58x_flags, u8 dlc) { struct canfd_frame *cfd; struct can_frame *ccf; struct sk_buff *skb; u8 len; bool is_can_fd = !!(es58x_flags & ES58X_FLAG_FD_DATA); if (dlc > CAN_MAX_RAW_DLC) { netdev_err(netdev, "%s: DLC is %d but maximum should be %d\n", __func__, dlc, CAN_MAX_RAW_DLC); return -EMSGSIZE; } if (is_can_fd) { len = can_fd_dlc2len(dlc); skb = alloc_canfd_skb(netdev, &cfd); } else { len = can_cc_dlc2len(dlc); skb = alloc_can_skb(netdev, &ccf); cfd = (struct canfd_frame *)ccf; } if (!skb) { netdev->stats.rx_dropped++; return 0; } cfd->can_id = can_id; if (es58x_flags & ES58X_FLAG_EFF) cfd->can_id |= CAN_EFF_FLAG; if (is_can_fd) { cfd->len = len; if (es58x_flags & ES58X_FLAG_FD_BRS) cfd->flags |= CANFD_BRS; if (es58x_flags & ES58X_FLAG_FD_ESI) cfd->flags |= CANFD_ESI; } else { can_frame_set_cc_len(ccf, dlc, es58x_priv(netdev)->can.ctrlmode); if (es58x_flags & ES58X_FLAG_RTR) { ccf->can_id |= CAN_RTR_FLAG; len = 0; } } memcpy(cfd->data, data, len); netdev->stats.rx_packets++; netdev->stats.rx_bytes += len; es58x_set_skb_timestamp(netdev, skb, timestamp); netif_rx(skb); es58x_priv(netdev)->err_passive_before_rtx_success = 0; return 0; } /** * es58x_rx_err_msg() - Handle a received CAN event or error message. * @netdev: CAN network device. * @error: Error code. * @event: Event code. * @timestamp: Timestamp received from a ES58X device. * * Handle the errors and events received by the ES58X device, create * a CAN error skb and post it. * * In some rare cases the devices might get stuck alternating between * CAN_STATE_ERROR_PASSIVE and CAN_STATE_ERROR_WARNING. To prevent * this behavior, we force a bus off state if the device goes in * CAN_STATE_ERROR_WARNING for ES58X_MAX_CONSECUTIVE_WARN consecutive * times with no successful transmission or reception in between. * * Once the device is in bus off state, the only way to restart it is * through the drivers/net/can/dev.c:can_restart() function. The * device is technically capable to recover by itself under certain * circumstances, however, allowing self recovery would create * complex race conditions with drivers/net/can/dev.c:can_restart() * and thus was not implemented. To activate automatic restart, please * set the restart-ms parameter (e.g. ip link set can0 type can * restart-ms 100). * * If the bus is really instable, this function would try to send a * lot of log messages. Those are rate limited (i.e. you will see * messages such as "net_ratelimit: XXX callbacks suppressed" in * dmesg). * * Return: zero on success, errno when any error occurs. */ int es58x_rx_err_msg(struct net_device *netdev, enum es58x_err error, enum es58x_event event, u64 timestamp) { struct es58x_priv *priv = es58x_priv(netdev); struct can_priv *can = netdev_priv(netdev); struct can_device_stats *can_stats = &can->can_stats; struct can_frame *cf = NULL; struct sk_buff *skb; int ret = 0; if (!netif_running(netdev)) { if (net_ratelimit()) netdev_info(netdev, "%s: %s is down, dropping packet\n", __func__, netdev->name); netdev->stats.rx_dropped++; return 0; } if (error == ES58X_ERR_OK && event == ES58X_EVENT_OK) { netdev_err(netdev, "%s: Both error and event are zero\n", __func__); return -EINVAL; } skb = alloc_can_err_skb(netdev, &cf); switch (error) { case ES58X_ERR_OK: /* 0: No error */ break; case ES58X_ERR_PROT_STUFF: if (net_ratelimit()) netdev_dbg(netdev, "Error BITSTUFF\n"); if (cf) cf->data[2] |= CAN_ERR_PROT_STUFF; break; case ES58X_ERR_PROT_FORM: if (net_ratelimit()) netdev_dbg(netdev, "Error FORMAT\n"); if (cf) cf->data[2] |= CAN_ERR_PROT_FORM; break; case ES58X_ERR_ACK: if (net_ratelimit()) netdev_dbg(netdev, "Error ACK\n"); if (cf) cf->can_id |= CAN_ERR_ACK; break; case ES58X_ERR_PROT_BIT: if (net_ratelimit()) netdev_dbg(netdev, "Error BIT\n"); if (cf) cf->data[2] |= CAN_ERR_PROT_BIT; break; case ES58X_ERR_PROT_CRC: if (net_ratelimit()) netdev_dbg(netdev, "Error CRC\n"); if (cf) cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ; break; case ES58X_ERR_PROT_BIT1: if (net_ratelimit()) netdev_dbg(netdev, "Error: expected a recessive bit but monitored a dominant one\n"); if (cf) cf->data[2] |= CAN_ERR_PROT_BIT1; break; case ES58X_ERR_PROT_BIT0: if (net_ratelimit()) netdev_dbg(netdev, "Error expected a dominant bit but monitored a recessive one\n"); if (cf) cf->data[2] |= CAN_ERR_PROT_BIT0; break; case ES58X_ERR_PROT_OVERLOAD: if (net_ratelimit()) netdev_dbg(netdev, "Error OVERLOAD\n"); if (cf) cf->data[2] |= CAN_ERR_PROT_OVERLOAD; break; case ES58X_ERR_PROT_UNSPEC: if (net_ratelimit()) netdev_dbg(netdev, "Unspecified error\n"); if (cf) cf->can_id |= CAN_ERR_PROT; break; default: if (net_ratelimit()) netdev_err(netdev, "%s: Unspecified error code 0x%04X\n", __func__, (int)error); if (cf) cf->can_id |= CAN_ERR_PROT; break; } switch (event) { case ES58X_EVENT_OK: /* 0: No event */ break; case ES58X_EVENT_CRTL_ACTIVE: if (can->state == CAN_STATE_BUS_OFF) { netdev_err(netdev, "%s: state transition: BUS OFF -> ACTIVE\n", __func__); } if (net_ratelimit()) netdev_dbg(netdev, "Event CAN BUS ACTIVE\n"); if (cf) cf->data[1] |= CAN_ERR_CRTL_ACTIVE; can->state = CAN_STATE_ERROR_ACTIVE; break; case ES58X_EVENT_CRTL_PASSIVE: if (net_ratelimit()) netdev_dbg(netdev, "Event CAN BUS PASSIVE\n"); /* Either TX or RX error count reached passive state * but we do not know which. Setting both flags by * default. */ if (cf) { cf->data[1] |= CAN_ERR_CRTL_RX_PASSIVE; cf->data[1] |= CAN_ERR_CRTL_TX_PASSIVE; } if (can->state < CAN_STATE_BUS_OFF) can->state = CAN_STATE_ERROR_PASSIVE; can_stats->error_passive++; if (priv->err_passive_before_rtx_success < U8_MAX) priv->err_passive_before_rtx_success++; break; case ES58X_EVENT_CRTL_WARNING: if (net_ratelimit()) netdev_dbg(netdev, "Event CAN BUS WARNING\n"); /* Either TX or RX error count reached warning state * but we do not know which. Setting both flags by * default. */ if (cf) { cf->data[1] |= CAN_ERR_CRTL_RX_WARNING; cf->data[1] |= CAN_ERR_CRTL_TX_WARNING; } if (can->state < CAN_STATE_BUS_OFF) can->state = CAN_STATE_ERROR_WARNING; can_stats->error_warning++; break; case ES58X_EVENT_BUSOFF: if (net_ratelimit()) netdev_dbg(netdev, "Event CAN BUS OFF\n"); if (cf) cf->can_id |= CAN_ERR_BUSOFF; can_stats->bus_off++; netif_stop_queue(netdev); if (can->state != CAN_STATE_BUS_OFF) { can->state = CAN_STATE_BUS_OFF; can_bus_off(netdev); ret = can->do_set_mode(netdev, CAN_MODE_STOP); } break; case ES58X_EVENT_SINGLE_WIRE: if (net_ratelimit()) netdev_warn(netdev, "Lost connection on either CAN high or CAN low\n"); /* Lost connection on either CAN high or CAN * low. Setting both flags by default. */ if (cf) { cf->data[4] |= CAN_ERR_TRX_CANH_NO_WIRE; cf->data[4] |= CAN_ERR_TRX_CANL_NO_WIRE; } break; default: if (net_ratelimit()) netdev_err(netdev, "%s: Unspecified event code 0x%04X\n", __func__, (int)event); if (cf) cf->can_id |= CAN_ERR_CRTL; break; } if (cf) { if (cf->data[1]) cf->can_id |= CAN_ERR_CRTL; if (cf->data[2] || cf->data[3]) { cf->can_id |= CAN_ERR_PROT; can_stats->bus_error++; } if (cf->data[4]) cf->can_id |= CAN_ERR_TRX; es58x_set_skb_timestamp(netdev, skb, timestamp); netif_rx(skb); } if ((event & ES58X_EVENT_CRTL_PASSIVE) && priv->err_passive_before_rtx_success == ES58X_CONSECUTIVE_ERR_PASSIVE_MAX) { netdev_info(netdev, "Got %d consecutive warning events with no successful RX or TX. Forcing bus-off\n", priv->err_passive_before_rtx_success); return es58x_rx_err_msg(netdev, ES58X_ERR_OK, ES58X_EVENT_BUSOFF, timestamp); } return ret; } /** * es58x_cmd_ret_desc() - Convert a command type to a string. * @cmd_ret_type: Type of the command which triggered the return code. * * The final line (return "<unknown>") should not be reached. If this * is the case, there is an implementation bug. * * Return: a readable description of the @cmd_ret_type. */ static const char *es58x_cmd_ret_desc(enum es58x_ret_type cmd_ret_type) { switch (cmd_ret_type) { case ES58X_RET_TYPE_SET_BITTIMING: return "Set bittiming"; case ES58X_RET_TYPE_ENABLE_CHANNEL: return "Enable channel"; case ES58X_RET_TYPE_DISABLE_CHANNEL: return "Disable channel"; case ES58X_RET_TYPE_TX_MSG: return "Transmit message"; case ES58X_RET_TYPE_RESET_RX: return "Reset RX"; case ES58X_RET_TYPE_RESET_TX: return "Reset TX"; case ES58X_RET_TYPE_DEVICE_ERR: return "Device error"; } return "<unknown>"; }; /** * es58x_rx_cmd_ret_u8() - Handle the command's return code received * from the ES58X device. * @dev: Device, only used for the dev_XXX() print functions. * @cmd_ret_type: Type of the command which triggered the return code. * @rx_cmd_ret_u8: Command error code as returned by the ES58X device. * * Handles the 8 bits command return code. Those are specific to the * ES581.4 device. The return value will eventually be used by * es58x_handle_urb_cmd() function which will take proper actions in * case of critical issues such and memory errors or bad CRC values. * * In contrast with es58x_rx_cmd_ret_u32(), the network device is * unknown. * * Return: zero on success, return errno when any error occurs. */ int es58x_rx_cmd_ret_u8(struct device *dev, enum es58x_ret_type cmd_ret_type, enum es58x_ret_u8 rx_cmd_ret_u8) { const char *ret_desc = es58x_cmd_ret_desc(cmd_ret_type); switch (rx_cmd_ret_u8) { case ES58X_RET_U8_OK: dev_dbg_ratelimited(dev, "%s: OK\n", ret_desc); return 0; case ES58X_RET_U8_ERR_UNSPECIFIED_FAILURE: dev_err(dev, "%s: unspecified failure\n", ret_desc); return -EBADMSG; case ES58X_RET_U8_ERR_NO_MEM: dev_err(dev, "%s: device ran out of memory\n", ret_desc); return -ENOMEM; case ES58X_RET_U8_ERR_BAD_CRC: dev_err(dev, "%s: CRC of previous command is incorrect\n", ret_desc); return -EIO; default: dev_err(dev, "%s: returned unknown value: 0x%02X\n", ret_desc, rx_cmd_ret_u8); return -EBADMSG; } } /** * es58x_rx_cmd_ret_u32() - Handle the command return code received * from the ES58X device. * @netdev: CAN network device. * @cmd_ret_type: Type of the command which triggered the return code. * @rx_cmd_ret_u32: error code as returned by the ES58X device. * * Handles the 32 bits command return code. The return value will * eventually be used by es58x_handle_urb_cmd() function which will * take proper actions in case of critical issues such and memory * errors or bad CRC values. * * Return: zero on success, errno when any error occurs. */ int es58x_rx_cmd_ret_u32(struct net_device *netdev, enum es58x_ret_type cmd_ret_type, enum es58x_ret_u32 rx_cmd_ret_u32) { struct es58x_priv *priv = es58x_priv(netdev); const struct es58x_operators *ops = priv->es58x_dev->ops; const char *ret_desc = es58x_cmd_ret_desc(cmd_ret_type); switch (rx_cmd_ret_u32) { case ES58X_RET_U32_OK: switch (cmd_ret_type) { case ES58X_RET_TYPE_ENABLE_CHANNEL: es58x_can_reset_echo_fifo(netdev); priv->can.state = CAN_STATE_ERROR_ACTIVE; netif_wake_queue(netdev); netdev_info(netdev, "%s: %s (Serial Number %s): CAN%d channel becomes ready\n", ret_desc, priv->es58x_dev->udev->product, priv->es58x_dev->udev->serial, priv->channel_idx + 1); break; case ES58X_RET_TYPE_TX_MSG: if (IS_ENABLED(CONFIG_VERBOSE_DEBUG) && net_ratelimit()) netdev_vdbg(netdev, "%s: OK\n", ret_desc); break; default: netdev_dbg(netdev, "%s: OK\n", ret_desc); break; } return 0; case ES58X_RET_U32_ERR_UNSPECIFIED_FAILURE: if (cmd_ret_type == ES58X_RET_TYPE_ENABLE_CHANNEL) { int ret; netdev_warn(netdev, "%s: channel is already opened, closing and re-opening it to reflect new configuration\n", ret_desc); ret = ops->disable_channel(es58x_priv(netdev)); if (ret) return ret; return ops->enable_channel(es58x_priv(netdev)); } if (cmd_ret_type == ES58X_RET_TYPE_DISABLE_CHANNEL) { netdev_info(netdev, "%s: channel is already closed\n", ret_desc); return 0; } netdev_err(netdev, "%s: unspecified failure\n", ret_desc); return -EBADMSG; case ES58X_RET_U32_ERR_NO_MEM: netdev_err(netdev, "%s: device ran out of memory\n", ret_desc); return -ENOMEM; case ES58X_RET_U32_WARN_PARAM_ADJUSTED: netdev_warn(netdev, "%s: some incompatible parameters have been adjusted\n", ret_desc); return 0; case ES58X_RET_U32_WARN_TX_MAYBE_REORDER: netdev_warn(netdev, "%s: TX messages might have been reordered\n", ret_desc); return 0; case ES58X_RET_U32_ERR_TIMEDOUT: netdev_err(netdev, "%s: command timed out\n", ret_desc); return -ETIMEDOUT; case ES58X_RET_U32_ERR_FIFO_FULL: netdev_warn(netdev, "%s: fifo is full\n", ret_desc); return 0; case ES58X_RET_U32_ERR_BAD_CONFIG: netdev_err(netdev, "%s: bad configuration\n", ret_desc); return -EINVAL; case ES58X_RET_U32_ERR_NO_RESOURCE: netdev_err(netdev, "%s: no resource available\n", ret_desc); return -EBUSY; default: netdev_err(netdev, "%s returned unknown value: 0x%08X\n", ret_desc, rx_cmd_ret_u32); return -EBADMSG; } } /** * es58x_increment_rx_errors() - Increment the network devices' error * count. * @es58x_dev: ES58X device. * * If an error occurs on the early stages on receiving an URB command, * we might not be able to figure out on which network device the * error occurred. In such case, we arbitrarily increment the error * count of all the network devices attached to our ES58X device. */ static void es58x_increment_rx_errors(struct es58x_device *es58x_dev) { int i; for (i = 0; i < es58x_dev->num_can_ch; i++) if (es58x_dev->netdev[i]) es58x_dev->netdev[i]->stats.rx_errors++; } /** * es58x_handle_urb_cmd() - Handle the URB command * @es58x_dev: ES58X device. * @urb_cmd: The URB command received from the ES58X device, might not * be aligned. * * Sends the URB command to the device specific function. Manages the * errors thrown back by those functions. */ static void es58x_handle_urb_cmd(struct es58x_device *es58x_dev, const union es58x_urb_cmd *urb_cmd) { const struct es58x_operators *ops = es58x_dev->ops; size_t cmd_len; int i, ret; ret = ops->handle_urb_cmd(es58x_dev, urb_cmd); switch (ret) { case 0: /* OK */ return; case -ENODEV: dev_err_ratelimited(es58x_dev->dev, "Device is not ready\n"); break; case -EINVAL: case -EMSGSIZE: case -EBADRQC: case -EBADMSG: case -ECHRNG: case -ETIMEDOUT: cmd_len = es58x_get_urb_cmd_len(es58x_dev, ops->get_msg_len(urb_cmd)); dev_err(es58x_dev->dev, "ops->handle_urb_cmd() returned error %pe", ERR_PTR(ret)); es58x_print_hex_dump(urb_cmd, cmd_len); break; case -EFAULT: case -ENOMEM: case -EIO: default: dev_crit(es58x_dev->dev, "ops->handle_urb_cmd() returned error %pe, detaching all network devices\n", ERR_PTR(ret)); for (i = 0; i < es58x_dev->num_can_ch; i++) if (es58x_dev->netdev[i]) netif_device_detach(es58x_dev->netdev[i]); if (es58x_dev->ops->reset_device) es58x_dev->ops->reset_device(es58x_dev); break; } /* Because the urb command could not fully be parsed, * channel_id is not confirmed. Incrementing rx_errors count * of all channels. */ es58x_increment_rx_errors(es58x_dev); } /** * es58x_check_rx_urb() - Check the length and format of the URB command. * @es58x_dev: ES58X device. * @urb_cmd: The URB command received from the ES58X device, might not * be aligned. * @urb_actual_len: The actual length of the URB command. * * Check if the first message of the received urb is valid, that is to * say that both the header and the length are coherent. * * Return: * the length of the first message of the URB on success. * * -ENODATA if the URB command is incomplete (in which case, the URB * command should be buffered and combined with the next URB to try to * reconstitute the URB command). * * -EOVERFLOW if the length is bigger than the maximum expected one. * * -EBADRQC if the start of frame does not match the expected value. */ static signed int es58x_check_rx_urb(struct es58x_device *es58x_dev, const union es58x_urb_cmd *urb_cmd, u32 urb_actual_len) { const struct device *dev = es58x_dev->dev; const struct es58x_parameters *param = es58x_dev->param; u16 sof, msg_len; signed int urb_cmd_len, ret; if (urb_actual_len < param->urb_cmd_header_len) { dev_vdbg(dev, "%s: Received %d bytes [%*ph]: header incomplete\n", __func__, urb_actual_len, urb_actual_len, urb_cmd->raw_cmd); return -ENODATA; } sof = get_unaligned_le16(&urb_cmd->sof); if (sof != param->rx_start_of_frame) { dev_err_ratelimited(es58x_dev->dev, "%s: Expected sequence 0x%04X for start of frame but got 0x%04X.\n", __func__, param->rx_start_of_frame, sof); return -EBADRQC; } msg_len = es58x_dev->ops->get_msg_len(urb_cmd); urb_cmd_len = es58x_get_urb_cmd_len(es58x_dev, msg_len); if (urb_cmd_len > param->rx_urb_cmd_max_len) { dev_err_ratelimited(es58x_dev->dev, "%s: Biggest expected size for rx urb_cmd is %u but receive a command of size %d\n", __func__, param->rx_urb_cmd_max_len, urb_cmd_len); return -EOVERFLOW; } else if (urb_actual_len < urb_cmd_len) { dev_vdbg(dev, "%s: Received %02d/%02d bytes\n", __func__, urb_actual_len, urb_cmd_len); return -ENODATA; } ret = es58x_check_crc(es58x_dev, urb_cmd, urb_cmd_len); if (ret) return ret; return urb_cmd_len; } /** * es58x_copy_to_cmd_buf() - Copy an array to the URB command buffer. * @es58x_dev: ES58X device. * @raw_cmd: the buffer we want to copy. * @raw_cmd_len: length of @raw_cmd. * * Concatenates @raw_cmd_len bytes of @raw_cmd to the end of the URB * command buffer. * * Return: zero on success, -EMSGSIZE if not enough space is available * to do the copy. */ static int es58x_copy_to_cmd_buf(struct es58x_device *es58x_dev, u8 *raw_cmd, int raw_cmd_len) { if (es58x_dev->rx_cmd_buf_len + raw_cmd_len > es58x_dev->param->rx_urb_cmd_max_len) return -EMSGSIZE; memcpy(&es58x_dev->rx_cmd_buf.raw_cmd[es58x_dev->rx_cmd_buf_len], raw_cmd, raw_cmd_len); es58x_dev->rx_cmd_buf_len += raw_cmd_len; return 0; } /** * es58x_split_urb_try_recovery() - Try to recover bad URB sequences. * @es58x_dev: ES58X device. * @raw_cmd: pointer to the buffer we want to copy. * @raw_cmd_len: length of @raw_cmd. * * Under some rare conditions, we might get incorrect URBs from the * device. From our observations, one of the valid URB gets replaced * by one from the past. The full root cause is not identified. * * This function looks for the next start of frame in the urb buffer * in order to try to recover. * * Such behavior was not observed on the devices of the ES58X FD * family and only seems to impact the ES581.4. * * Return: the number of bytes dropped on success, -EBADMSG if recovery failed. */ static int es58x_split_urb_try_recovery(struct es58x_device *es58x_dev, u8 *raw_cmd, size_t raw_cmd_len) { union es58x_urb_cmd *urb_cmd; signed int urb_cmd_len; u16 sof; int dropped_bytes = 0; es58x_increment_rx_errors(es58x_dev); while (raw_cmd_len > sizeof(sof)) { urb_cmd = (union es58x_urb_cmd *)raw_cmd; sof = get_unaligned_le16(&urb_cmd->sof); if (sof == es58x_dev->param->rx_start_of_frame) { urb_cmd_len = es58x_check_rx_urb(es58x_dev, urb_cmd, raw_cmd_len); if ((urb_cmd_len == -ENODATA) || urb_cmd_len > 0) { dev_info_ratelimited(es58x_dev->dev, "Recovery successful! Dropped %d bytes (urb_cmd_len: %d)\n", dropped_bytes, urb_cmd_len); return dropped_bytes; } } raw_cmd++; raw_cmd_len--; dropped_bytes++; } dev_warn_ratelimited(es58x_dev->dev, "%s: Recovery failed\n", __func__); return -EBADMSG; } /** * es58x_handle_incomplete_cmd() - Reconstitute an URB command from * different URB pieces. * @es58x_dev: ES58X device. * @urb: last urb buffer received. * * The device might split the URB commands in an arbitrary amount of * pieces. This function concatenates those in an URB buffer until a * full URB command is reconstituted and consume it. * * Return: * number of bytes consumed from @urb if successful. * * -ENODATA if the URB command is still incomplete. * * -EBADMSG if the URB command is incorrect. */ static signed int es58x_handle_incomplete_cmd(struct es58x_device *es58x_dev, struct urb *urb) { size_t cpy_len; signed int urb_cmd_len, tmp_cmd_buf_len, ret; tmp_cmd_buf_len = es58x_dev->rx_cmd_buf_len; cpy_len = min_t(int, es58x_dev->param->rx_urb_cmd_max_len - es58x_dev->rx_cmd_buf_len, urb->actual_length); ret = es58x_copy_to_cmd_buf(es58x_dev, urb->transfer_buffer, cpy_len); if (ret < 0) return ret; urb_cmd_len = es58x_check_rx_urb(es58x_dev, &es58x_dev->rx_cmd_buf, es58x_dev->rx_cmd_buf_len); if (urb_cmd_len == -ENODATA) { return -ENODATA; } else if (urb_cmd_len < 0) { dev_err_ratelimited(es58x_dev->dev, "Could not reconstitute incomplete command from previous URB, dropping %d bytes\n", tmp_cmd_buf_len + urb->actual_length); dev_err_ratelimited(es58x_dev->dev, "Error code: %pe, es58x_dev->rx_cmd_buf_len: %d, urb->actual_length: %u\n", ERR_PTR(urb_cmd_len), tmp_cmd_buf_len, urb->actual_length); es58x_print_hex_dump(&es58x_dev->rx_cmd_buf, tmp_cmd_buf_len); es58x_print_hex_dump(urb->transfer_buffer, urb->actual_length); return urb->actual_length; } es58x_handle_urb_cmd(es58x_dev, &es58x_dev->rx_cmd_buf); return urb_cmd_len - tmp_cmd_buf_len; /* consumed length */ } /** * es58x_split_urb() - Cut the received URB in individual URB commands. * @es58x_dev: ES58X device. * @urb: last urb buffer received. * * The device might send urb in bulk format (i.e. several URB commands * concatenated together). This function will split all the commands * contained in the urb. * * Return: * number of bytes consumed from @urb if successful. * * -ENODATA if the URB command is incomplete. * * -EBADMSG if the URB command is incorrect. */ static signed int es58x_split_urb(struct es58x_device *es58x_dev, struct urb *urb) { union es58x_urb_cmd *urb_cmd; u8 *raw_cmd = urb->transfer_buffer; s32 raw_cmd_len = urb->actual_length; int ret; if (es58x_dev->rx_cmd_buf_len != 0) { ret = es58x_handle_incomplete_cmd(es58x_dev, urb); if (ret != -ENODATA) es58x_dev->rx_cmd_buf_len = 0; if (ret < 0) return ret; raw_cmd += ret; raw_cmd_len -= ret; } while (raw_cmd_len > 0) { if (raw_cmd[0] == ES58X_HEARTBEAT) { raw_cmd++; raw_cmd_len--; continue; } urb_cmd = (union es58x_urb_cmd *)raw_cmd; ret = es58x_check_rx_urb(es58x_dev, urb_cmd, raw_cmd_len); if (ret > 0) { es58x_handle_urb_cmd(es58x_dev, urb_cmd); } else if (ret == -ENODATA) { es58x_copy_to_cmd_buf(es58x_dev, raw_cmd, raw_cmd_len); return -ENODATA; } else if (ret < 0) { ret = es58x_split_urb_try_recovery(es58x_dev, raw_cmd, raw_cmd_len); if (ret < 0) return ret; } raw_cmd += ret; raw_cmd_len -= ret; } return 0; } /** * es58x_read_bulk_callback() - Callback for reading data from device. * @urb: last urb buffer received. * * This function gets eventually called each time an URB is received * from the ES58X device. * * Checks urb status, calls read function and resubmits urb read * operation. */ static void es58x_read_bulk_callback(struct urb *urb) { struct es58x_device *es58x_dev = urb->context; const struct device *dev = es58x_dev->dev; int i, ret; switch (urb->status) { case 0: /* success */ break; case -EOVERFLOW: dev_err_ratelimited(dev, "%s: error %pe\n", __func__, ERR_PTR(urb->status)); es58x_print_hex_dump_debug(urb->transfer_buffer, urb->transfer_buffer_length); goto resubmit_urb; case -EPROTO: dev_warn_ratelimited(dev, "%s: error %pe. Device unplugged?\n", __func__, ERR_PTR(urb->status)); goto free_urb; case -ENOENT: case -EPIPE: dev_err_ratelimited(dev, "%s: error %pe\n", __func__, ERR_PTR(urb->status)); goto free_urb; case -ESHUTDOWN: dev_dbg_ratelimited(dev, "%s: error %pe\n", __func__, ERR_PTR(urb->status)); goto free_urb; default: dev_err_ratelimited(dev, "%s: error %pe\n", __func__, ERR_PTR(urb->status)); goto resubmit_urb; } ret = es58x_split_urb(es58x_dev, urb); if ((ret != -ENODATA) && ret < 0) { dev_err(es58x_dev->dev, "es58x_split_urb() returned error %pe", ERR_PTR(ret)); es58x_print_hex_dump_debug(urb->transfer_buffer, urb->actual_length); /* Because the urb command could not be parsed, * channel_id is not confirmed. Incrementing rx_errors * count of all channels. */ es58x_increment_rx_errors(es58x_dev); } resubmit_urb: ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret == -ENODEV) { for (i = 0; i < es58x_dev->num_can_ch; i++) if (es58x_dev->netdev[i]) netif_device_detach(es58x_dev->netdev[i]); } else if (ret) dev_err_ratelimited(dev, "Failed resubmitting read bulk urb: %pe\n", ERR_PTR(ret)); return; free_urb: usb_free_coherent(urb->dev, urb->transfer_buffer_length, urb->transfer_buffer, urb->transfer_dma); } /** * es58x_write_bulk_callback() - Callback after writing data to the device. * @urb: urb buffer which was previously submitted. * * This function gets eventually called each time an URB was sent to * the ES58X device. * * Puts the @urb back to the urbs idle anchor and tries to restart the * network queue. */ static void es58x_write_bulk_callback(struct urb *urb) { struct net_device *netdev = urb->context; struct es58x_device *es58x_dev = es58x_priv(netdev)->es58x_dev; switch (urb->status) { case 0: /* success */ break; case -EOVERFLOW: if (net_ratelimit()) netdev_err(netdev, "%s: error %pe\n", __func__, ERR_PTR(urb->status)); es58x_print_hex_dump(urb->transfer_buffer, urb->transfer_buffer_length); break; case -ENOENT: if (net_ratelimit()) netdev_dbg(netdev, "%s: error %pe\n", __func__, ERR_PTR(urb->status)); usb_free_coherent(urb->dev, es58x_dev->param->tx_urb_cmd_max_len, urb->transfer_buffer, urb->transfer_dma); return; default: if (net_ratelimit()) netdev_info(netdev, "%s: error %pe\n", __func__, ERR_PTR(urb->status)); break; } usb_anchor_urb(urb, &es58x_dev->tx_urbs_idle); atomic_inc(&es58x_dev->tx_urbs_idle_cnt); } /** * es58x_alloc_urb() - Allocate memory for an URB and its transfer * buffer. * @es58x_dev: ES58X device. * @urb: URB to be allocated. * @buf: used to return DMA address of buffer. * @buf_len: requested buffer size. * @mem_flags: affect whether allocation may block. * * Allocates an URB and its @transfer_buffer and set its @transfer_dma * address. * * This function is used at start-up to allocate all RX URBs at once * and during run time for TX URBs. * * Return: zero on success, -ENOMEM if no memory is available. */ static int es58x_alloc_urb(struct es58x_device *es58x_dev, struct urb **urb, u8 **buf, size_t buf_len, gfp_t mem_flags) { *urb = usb_alloc_urb(0, mem_flags); if (!*urb) { dev_err(es58x_dev->dev, "No memory left for URBs\n"); return -ENOMEM; } *buf = usb_alloc_coherent(es58x_dev->udev, buf_len, mem_flags, &(*urb)->transfer_dma); if (!*buf) { dev_err(es58x_dev->dev, "No memory left for USB buffer\n"); usb_free_urb(*urb); return -ENOMEM; } (*urb)->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; return 0; } /** * es58x_get_tx_urb() - Get an URB for transmission. * @es58x_dev: ES58X device. * * Gets an URB from the idle urbs anchor or allocate a new one if the * anchor is empty. * * If there are more than ES58X_TX_URBS_MAX in the idle anchor, do * some garbage collection. The garbage collection is done here * instead of within es58x_write_bulk_callback() because * usb_free_coherent() should not be used in IRQ context: * c.f. WARN_ON(irqs_disabled()) in dma_free_attrs(). * * Return: a pointer to an URB on success, NULL if no memory is * available. */ static struct urb *es58x_get_tx_urb(struct es58x_device *es58x_dev) { atomic_t *idle_cnt = &es58x_dev->tx_urbs_idle_cnt; struct urb *urb = usb_get_from_anchor(&es58x_dev->tx_urbs_idle); if (!urb) { size_t tx_buf_len; u8 *buf; tx_buf_len = es58x_dev->param->tx_urb_cmd_max_len; if (es58x_alloc_urb(es58x_dev, &urb, &buf, tx_buf_len, GFP_ATOMIC)) return NULL; usb_fill_bulk_urb(urb, es58x_dev->udev, es58x_dev->tx_pipe, buf, tx_buf_len, es58x_write_bulk_callback, NULL); return urb; } while (atomic_dec_return(idle_cnt) > ES58X_TX_URBS_MAX) { /* Garbage collector */ struct urb *tmp = usb_get_from_anchor(&es58x_dev->tx_urbs_idle); if (!tmp) break; usb_free_coherent(tmp->dev, es58x_dev->param->tx_urb_cmd_max_len, tmp->transfer_buffer, tmp->transfer_dma); usb_free_urb(tmp); } return urb; } /** * es58x_submit_urb() - Send data to the device. * @es58x_dev: ES58X device. * @urb: URB to be sent. * @netdev: CAN network device. * * Return: zero on success, errno when any error occurs. */ static int es58x_submit_urb(struct es58x_device *es58x_dev, struct urb *urb, struct net_device *netdev) { int ret; es58x_set_crc(urb->transfer_buffer, urb->transfer_buffer_length); urb->context = netdev; usb_anchor_urb(urb, &es58x_dev->tx_urbs_busy); ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret) { netdev_err(netdev, "%s: USB send urb failure: %pe\n", __func__, ERR_PTR(ret)); usb_unanchor_urb(urb); usb_free_coherent(urb->dev, es58x_dev->param->tx_urb_cmd_max_len, urb->transfer_buffer, urb->transfer_dma); } usb_free_urb(urb); return ret; } /** * es58x_send_msg() - Prepare an URB and submit it. * @es58x_dev: ES58X device. * @cmd_type: Command type. * @cmd_id: Command ID. * @msg: ES58X message to be sent. * @msg_len: Length of @msg. * @channel_idx: Index of the network device. * * Creates an URB command from a given message, sets the header and the * CRC and then submits it. * * Return: zero on success, errno when any error occurs. */ int es58x_send_msg(struct es58x_device *es58x_dev, u8 cmd_type, u8 cmd_id, const void *msg, u16 msg_len, int channel_idx) { struct net_device *netdev; union es58x_urb_cmd *urb_cmd; struct urb *urb; int urb_cmd_len; if (channel_idx == ES58X_CHANNEL_IDX_NA) netdev = es58x_dev->netdev[0]; /* Default to first channel */ else netdev = es58x_dev->netdev[channel_idx]; urb_cmd_len = es58x_get_urb_cmd_len(es58x_dev, msg_len); if (urb_cmd_len > es58x_dev->param->tx_urb_cmd_max_len) return -EOVERFLOW; urb = es58x_get_tx_urb(es58x_dev); if (!urb) return -ENOMEM; urb_cmd = urb->transfer_buffer; es58x_dev->ops->fill_urb_header(urb_cmd, cmd_type, cmd_id, channel_idx, msg_len); memcpy(&urb_cmd->raw_cmd[es58x_dev->param->urb_cmd_header_len], msg, msg_len); urb->transfer_buffer_length = urb_cmd_len; return es58x_submit_urb(es58x_dev, urb, netdev); } /** * es58x_alloc_rx_urbs() - Allocate RX URBs. * @es58x_dev: ES58X device. * * Allocate URBs for reception and anchor them. * * Return: zero on success, errno when any error occurs. */ static int es58x_alloc_rx_urbs(struct es58x_device *es58x_dev) { const struct device *dev = es58x_dev->dev; const struct es58x_parameters *param = es58x_dev->param; u16 rx_buf_len = usb_maxpacket(es58x_dev->udev, es58x_dev->rx_pipe); struct urb *urb; u8 *buf; int i; int ret = -EINVAL; for (i = 0; i < param->rx_urb_max; i++) { ret = es58x_alloc_urb(es58x_dev, &urb, &buf, rx_buf_len, GFP_KERNEL); if (ret) break; usb_fill_bulk_urb(urb, es58x_dev->udev, es58x_dev->rx_pipe, buf, rx_buf_len, es58x_read_bulk_callback, es58x_dev); usb_anchor_urb(urb, &es58x_dev->rx_urbs); ret = usb_submit_urb(urb, GFP_KERNEL); if (ret) { usb_unanchor_urb(urb); usb_free_coherent(es58x_dev->udev, rx_buf_len, buf, urb->transfer_dma); usb_free_urb(urb); break; } usb_free_urb(urb); } if (i == 0) { dev_err(dev, "%s: Could not setup any rx URBs\n", __func__); return ret; } dev_dbg(dev, "%s: Allocated %d rx URBs each of size %u\n", __func__, i, rx_buf_len); return ret; } /** * es58x_free_urbs() - Free all the TX and RX URBs. * @es58x_dev: ES58X device. */ static void es58x_free_urbs(struct es58x_device *es58x_dev) { struct urb *urb; if (!usb_wait_anchor_empty_timeout(&es58x_dev->tx_urbs_busy, 1000)) { dev_err(es58x_dev->dev, "%s: Timeout, some TX urbs still remain\n", __func__); usb_kill_anchored_urbs(&es58x_dev->tx_urbs_busy); } while ((urb = usb_get_from_anchor(&es58x_dev->tx_urbs_idle)) != NULL) { usb_free_coherent(urb->dev, es58x_dev->param->tx_urb_cmd_max_len, urb->transfer_buffer, urb->transfer_dma); usb_free_urb(urb); atomic_dec(&es58x_dev->tx_urbs_idle_cnt); } if (atomic_read(&es58x_dev->tx_urbs_idle_cnt)) dev_err(es58x_dev->dev, "All idle urbs were freed but tx_urb_idle_cnt is %d\n", atomic_read(&es58x_dev->tx_urbs_idle_cnt)); usb_kill_anchored_urbs(&es58x_dev->rx_urbs); } /** * es58x_open() - Enable the network device. * @netdev: CAN network device. * * Called when the network transitions to the up state. Allocate the * URB resources if needed and open the channel. * * Return: zero on success, errno when any error occurs. */ static int es58x_open(struct net_device *netdev) { struct es58x_device *es58x_dev = es58x_priv(netdev)->es58x_dev; int ret; if (!es58x_dev->opened_channel_cnt) { ret = es58x_alloc_rx_urbs(es58x_dev); if (ret) return ret; ret = es58x_set_realtime_diff_ns(es58x_dev); if (ret) goto free_urbs; } ret = open_candev(netdev); if (ret) goto free_urbs; ret = es58x_dev->ops->enable_channel(es58x_priv(netdev)); if (ret) goto free_urbs; es58x_dev->opened_channel_cnt++; netif_start_queue(netdev); return ret; free_urbs: if (!es58x_dev->opened_channel_cnt) es58x_free_urbs(es58x_dev); netdev_err(netdev, "%s: Could not open the network device: %pe\n", __func__, ERR_PTR(ret)); return ret; } /** * es58x_stop() - Disable the network device. * @netdev: CAN network device. * * Called when the network transitions to the down state. If all the * channels of the device are closed, free the URB resources which are * not needed anymore. * * Return: zero on success, errno when any error occurs. */ static int es58x_stop(struct net_device *netdev) { struct es58x_priv *priv = es58x_priv(netdev); struct es58x_device *es58x_dev = priv->es58x_dev; int ret; netif_stop_queue(netdev); ret = es58x_dev->ops->disable_channel(priv); if (ret) return ret; priv->can.state = CAN_STATE_STOPPED; es58x_can_reset_echo_fifo(netdev); close_candev(netdev); es58x_flush_pending_tx_msg(netdev); es58x_dev->opened_channel_cnt--; if (!es58x_dev->opened_channel_cnt) es58x_free_urbs(es58x_dev); return 0; } /** * es58x_xmit_commit() - Send the bulk urb. * @netdev: CAN network device. * * Do the bulk send. This function should be called only once by bulk * transmission. * * Return: zero on success, errno when any error occurs. */ static int es58x_xmit_commit(struct net_device *netdev) { struct es58x_priv *priv = es58x_priv(netdev); int ret; if (!es58x_is_can_state_active(netdev)) return -ENETDOWN; if (es58x_is_echo_skb_threshold_reached(priv)) netif_stop_queue(netdev); ret = es58x_submit_urb(priv->es58x_dev, priv->tx_urb, netdev); if (ret == 0) priv->tx_urb = NULL; return ret; } /** * es58x_xmit_more() - Can we put more packets? * @priv: ES58X private parameters related to the network device. * * Return: true if we can put more, false if it is time to send. */ static bool es58x_xmit_more(struct es58x_priv *priv) { unsigned int free_slots = priv->can.echo_skb_max - (priv->tx_head - priv->tx_tail); return netdev_xmit_more() && free_slots > 0 && priv->tx_can_msg_cnt < priv->es58x_dev->param->tx_bulk_max; } /** * es58x_start_xmit() - Transmit an skb. * @skb: socket buffer of a CAN message. * @netdev: CAN network device. * * Called when a packet needs to be transmitted. * * This function relies on Byte Queue Limits (BQL). The main benefit * is to increase the throughput by allowing bulk transfers * (c.f. xmit_more flag). * * Queues up to tx_bulk_max messages in &tx_urb buffer and does * a bulk send of all messages in one single URB. * * Return: NETDEV_TX_OK regardless of if we could transmit the @skb or * had to drop it. */ static netdev_tx_t es58x_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct es58x_priv *priv = es58x_priv(netdev); struct es58x_device *es58x_dev = priv->es58x_dev; unsigned int frame_len; int ret; if (can_dev_dropped_skb(netdev, skb)) { if (priv->tx_urb) goto xmit_commit; return NETDEV_TX_OK; } if (priv->tx_urb && priv->tx_can_msg_is_fd != can_is_canfd_skb(skb)) { /* Can not do bulk send with mixed CAN and CAN FD frames. */ ret = es58x_xmit_commit(netdev); if (ret) goto drop_skb; } if (!priv->tx_urb) { priv->tx_urb = es58x_get_tx_urb(es58x_dev); if (!priv->tx_urb) { ret = -ENOMEM; goto drop_skb; } priv->tx_can_msg_cnt = 0; priv->tx_can_msg_is_fd = can_is_canfd_skb(skb); } ret = es58x_dev->ops->tx_can_msg(priv, skb); if (ret) goto drop_skb; frame_len = can_skb_get_frame_len(skb); ret = can_put_echo_skb(skb, netdev, priv->tx_head & es58x_dev->param->fifo_mask, frame_len); if (ret) goto xmit_failure; netdev_sent_queue(netdev, frame_len); priv->tx_head++; priv->tx_can_msg_cnt++; xmit_commit: if (!es58x_xmit_more(priv)) { ret = es58x_xmit_commit(netdev); if (ret) goto xmit_failure; } return NETDEV_TX_OK; drop_skb: dev_kfree_skb(skb); netdev->stats.tx_dropped++; xmit_failure: netdev_warn(netdev, "%s: send message failure: %pe\n", __func__, ERR_PTR(ret)); netdev->stats.tx_errors++; es58x_flush_pending_tx_msg(netdev); return NETDEV_TX_OK; } static const struct net_device_ops es58x_netdev_ops = { .ndo_open = es58x_open, .ndo_stop = es58x_stop, .ndo_start_xmit = es58x_start_xmit, .ndo_eth_ioctl = can_eth_ioctl_hwts, }; static const struct ethtool_ops es58x_ethtool_ops = { .get_ts_info = can_ethtool_op_get_ts_info_hwts, }; /** * es58x_set_mode() - Change network device mode. * @netdev: CAN network device. * @mode: either %CAN_MODE_START, %CAN_MODE_STOP or %CAN_MODE_SLEEP * * Currently, this function is only used to stop and restart the * channel during a bus off event (c.f. es58x_rx_err_msg() and * drivers/net/can/dev.c:can_restart() which are the two only * callers). * * Return: zero on success, errno when any error occurs. */ static int es58x_set_mode(struct net_device *netdev, enum can_mode mode) { struct es58x_priv *priv = es58x_priv(netdev); switch (mode) { case CAN_MODE_START: switch (priv->can.state) { case CAN_STATE_BUS_OFF: return priv->es58x_dev->ops->enable_channel(priv); case CAN_STATE_STOPPED: return es58x_open(netdev); case CAN_STATE_ERROR_ACTIVE: case CAN_STATE_ERROR_WARNING: case CAN_STATE_ERROR_PASSIVE: default: return 0; } case CAN_MODE_STOP: switch (priv->can.state) { case CAN_STATE_STOPPED: return 0; case CAN_STATE_ERROR_ACTIVE: case CAN_STATE_ERROR_WARNING: case CAN_STATE_ERROR_PASSIVE: case CAN_STATE_BUS_OFF: default: return priv->es58x_dev->ops->disable_channel(priv); } case CAN_MODE_SLEEP: default: return -EOPNOTSUPP; } } /** * es58x_init_priv() - Initialize private parameters. * @es58x_dev: ES58X device. * @priv: ES58X private parameters related to the network device. * @channel_idx: Index of the network device. * * Return: zero on success, errno if devlink port could not be * properly registered. */ static int es58x_init_priv(struct es58x_device *es58x_dev, struct es58x_priv *priv, int channel_idx) { struct devlink_port_attrs attrs = { .flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL, }; const struct es58x_parameters *param = es58x_dev->param; struct can_priv *can = &priv->can; priv->es58x_dev = es58x_dev; priv->channel_idx = channel_idx; priv->tx_urb = NULL; priv->tx_can_msg_cnt = 0; can->bittiming_const = param->bittiming_const; if (param->ctrlmode_supported & CAN_CTRLMODE_FD) { can->fd.data_bittiming_const = param->data_bittiming_const; can->fd.tdc_const = param->tdc_const; } can->bitrate_max = param->bitrate_max; can->clock = param->clock; can->state = CAN_STATE_STOPPED; can->ctrlmode_supported = param->ctrlmode_supported; can->do_set_mode = es58x_set_mode; devlink_port_attrs_set(&priv->devlink_port, &attrs); return devlink_port_register(priv_to_devlink(es58x_dev), &priv->devlink_port, channel_idx); } /** * es58x_init_netdev() - Initialize the network device. * @es58x_dev: ES58X device. * @channel_idx: Index of the network device. * * Return: zero on success, errno when any error occurs. */ static int es58x_init_netdev(struct es58x_device *es58x_dev, int channel_idx) { struct net_device *netdev; struct device *dev = es58x_dev->dev; int ret; netdev = alloc_candev(sizeof(struct es58x_priv), es58x_dev->param->fifo_mask + 1); if (!netdev) { dev_err(dev, "Could not allocate candev\n"); return -ENOMEM; } SET_NETDEV_DEV(netdev, dev); es58x_dev->netdev[channel_idx] = netdev; ret = es58x_init_priv(es58x_dev, es58x_priv(netdev), channel_idx); if (ret) goto free_candev; SET_NETDEV_DEVLINK_PORT(netdev, &es58x_priv(netdev)->devlink_port); netdev->netdev_ops = &es58x_netdev_ops; netdev->ethtool_ops = &es58x_ethtool_ops; netdev->flags |= IFF_ECHO; /* We support local echo */ netdev->dev_port = channel_idx; ret = register_candev(netdev); if (ret) goto devlink_port_unregister; netdev_queue_set_dql_min_limit(netdev_get_tx_queue(netdev, 0), es58x_dev->param->dql_min_limit); return ret; devlink_port_unregister: devlink_port_unregister(&es58x_priv(netdev)->devlink_port); free_candev: es58x_dev->netdev[channel_idx] = NULL; free_candev(netdev); return ret; } /** * es58x_free_netdevs() - Release all network resources of the device. * @es58x_dev: ES58X device. */ static void es58x_free_netdevs(struct es58x_device *es58x_dev) { int i; for (i = 0; i < es58x_dev->num_can_ch; i++) { struct net_device *netdev = es58x_dev->netdev[i]; if (!netdev) continue; unregister_candev(netdev); devlink_port_unregister(&es58x_priv(netdev)->devlink_port); es58x_dev->netdev[i] = NULL; free_candev(netdev); } } /** * es58x_init_es58x_dev() - Initialize the ES58X device. * @intf: USB interface. * @driver_info: Quirks of the device. * * Return: pointer to an ES58X device on success, error pointer when * any error occurs. */ static struct es58x_device *es58x_init_es58x_dev(struct usb_interface *intf, kernel_ulong_t driver_info) { struct device *dev = &intf->dev; struct es58x_device *es58x_dev; struct devlink *devlink; const struct es58x_parameters *param; const struct es58x_operators *ops; struct usb_device *udev = interface_to_usbdev(intf); struct usb_endpoint_descriptor *ep_in, *ep_out; int ret; dev_info(dev, "Starting %s %s (Serial Number %s)\n", udev->manufacturer, udev->product, udev->serial); ret = usb_find_common_endpoints(intf->cur_altsetting, &ep_in, &ep_out, NULL, NULL); if (ret) return ERR_PTR(ret); if (driver_info & ES58X_FD_FAMILY) { param = &es58x_fd_param; ops = &es58x_fd_ops; } else { param = &es581_4_param; ops = &es581_4_ops; } devlink = devlink_alloc(&es58x_dl_ops, es58x_sizeof_es58x_device(param), dev); if (!devlink) return ERR_PTR(-ENOMEM); es58x_dev = devlink_priv(devlink); es58x_dev->param = param; es58x_dev->ops = ops; es58x_dev->dev = dev; es58x_dev->udev = udev; if (driver_info & ES58X_DUAL_CHANNEL) es58x_dev->num_can_ch = 2; else es58x_dev->num_can_ch = 1; init_usb_anchor(&es58x_dev->rx_urbs); init_usb_anchor(&es58x_dev->tx_urbs_idle); init_usb_anchor(&es58x_dev->tx_urbs_busy); atomic_set(&es58x_dev->tx_urbs_idle_cnt, 0); usb_set_intfdata(intf, es58x_dev); es58x_dev->rx_pipe = usb_rcvbulkpipe(es58x_dev->udev, ep_in->bEndpointAddress); es58x_dev->tx_pipe = usb_sndbulkpipe(es58x_dev->udev, ep_out->bEndpointAddress); return es58x_dev; } /** * es58x_probe() - Initialize the USB device. * @intf: USB interface. * @id: USB device ID. * * Return: zero on success, -ENODEV if the interface is not supported * or errno when any other error occurs. */ static int es58x_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct es58x_device *es58x_dev; int ch_idx; es58x_dev = es58x_init_es58x_dev(intf, id->driver_info); if (IS_ERR(es58x_dev)) return PTR_ERR(es58x_dev); es58x_parse_product_info(es58x_dev); devlink_register(priv_to_devlink(es58x_dev)); for (ch_idx = 0; ch_idx < es58x_dev->num_can_ch; ch_idx++) { int ret = es58x_init_netdev(es58x_dev, ch_idx); if (ret) { es58x_free_netdevs(es58x_dev); return ret; } } return 0; } /** * es58x_disconnect() - Disconnect the USB device. * @intf: USB interface * * Called by the usb core when driver is unloaded or device is * removed. */ static void es58x_disconnect(struct usb_interface *intf) { struct es58x_device *es58x_dev = usb_get_intfdata(intf); dev_info(&intf->dev, "Disconnecting %s %s\n", es58x_dev->udev->manufacturer, es58x_dev->udev->product); devlink_unregister(priv_to_devlink(es58x_dev)); es58x_free_netdevs(es58x_dev); es58x_free_urbs(es58x_dev); devlink_free(priv_to_devlink(es58x_dev)); usb_set_intfdata(intf, NULL); } static struct usb_driver es58x_driver = { .name = KBUILD_MODNAME, .probe = es58x_probe, .disconnect = es58x_disconnect, .id_table = es58x_id_table }; module_usb_driver(es58x_driver); |
| 54 34 19 17 22 17 82 38 27 27 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __SOCK_DIAG_H__ #define __SOCK_DIAG_H__ #include <linux/netlink.h> #include <linux/user_namespace.h> #include <net/net_namespace.h> #include <net/sock.h> #include <uapi/linux/sock_diag.h> struct sk_buff; struct nlmsghdr; struct sock; struct sock_diag_handler { struct module *owner; __u8 family; int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); int (*get_info)(struct sk_buff *skb, struct sock *sk); int (*destroy)(struct sk_buff *skb, struct nlmsghdr *nlh); }; int sock_diag_register(const struct sock_diag_handler *h); void sock_diag_unregister(const struct sock_diag_handler *h); struct sock_diag_inet_compat { struct module *owner; int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh); }; void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr); void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr); u64 __sock_gen_cookie(struct sock *sk); static inline u64 sock_gen_cookie(struct sock *sk) { u64 cookie; preempt_disable(); cookie = __sock_gen_cookie(sk); preempt_enable(); return cookie; } int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie); void sock_diag_save_cookie(struct sock *sk, __u32 *cookie); int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, struct sk_buff *skb, int attrtype); static inline enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk) { switch (sk->sk_family) { case AF_INET: if (sk->sk_type == SOCK_RAW) return SKNLGRP_NONE; switch (sk->sk_protocol) { case IPPROTO_TCP: return SKNLGRP_INET_TCP_DESTROY; case IPPROTO_UDP: return SKNLGRP_INET_UDP_DESTROY; default: return SKNLGRP_NONE; } case AF_INET6: if (sk->sk_type == SOCK_RAW) return SKNLGRP_NONE; switch (sk->sk_protocol) { case IPPROTO_TCP: return SKNLGRP_INET6_TCP_DESTROY; case IPPROTO_UDP: return SKNLGRP_INET6_UDP_DESTROY; default: return SKNLGRP_NONE; } default: return SKNLGRP_NONE; } } static inline bool sock_diag_has_destroy_listeners(const struct sock *sk) { const struct net *n = sock_net(sk); const enum sknetlink_groups group = sock_diag_destroy_group(sk); return group != SKNLGRP_NONE && n->diag_nlsk && netlink_has_listeners(n->diag_nlsk, group); } void sock_diag_broadcast_destroy(struct sock *sk); int sock_diag_destroy(struct sock *sk, int err); #endif |
| 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 1 1 1 1 4 5 5 24 23 2 2 1 1 24 1 24 22 172 8 2 6 3 3 2 2 168 14 14 14 14 14 14 9 14 14 14 5 5 5 5 1 1 5 23 14 14 14 14 5 5 14 5 5 23 4 1 3 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 | // SPDX-License-Identifier: GPL-2.0-only /* * Yama Linux Security Module * * Author: Kees Cook <keescook@chromium.org> * * Copyright (C) 2010 Canonical, Ltd. * Copyright (C) 2011 The Chromium OS Authors. */ #include <linux/lsm_hooks.h> #include <linux/sysctl.h> #include <linux/ptrace.h> #include <linux/prctl.h> #include <linux/ratelimit.h> #include <linux/workqueue.h> #include <linux/string_helpers.h> #include <linux/task_work.h> #include <linux/sched.h> #include <linux/spinlock.h> #include <uapi/linux/lsm.h> #define YAMA_SCOPE_DISABLED 0 #define YAMA_SCOPE_RELATIONAL 1 #define YAMA_SCOPE_CAPABILITY 2 #define YAMA_SCOPE_NO_ATTACH 3 static int ptrace_scope = YAMA_SCOPE_RELATIONAL; /* describe a ptrace relationship for potential exception */ struct ptrace_relation { struct task_struct *tracer; struct task_struct *tracee; bool invalid; struct list_head node; struct rcu_head rcu; }; static LIST_HEAD(ptracer_relations); static DEFINE_SPINLOCK(ptracer_relations_lock); static void yama_relation_cleanup(struct work_struct *work); static DECLARE_WORK(yama_relation_work, yama_relation_cleanup); struct access_report_info { struct callback_head work; const char *access; struct task_struct *target; struct task_struct *agent; }; static void __report_access(struct callback_head *work) { struct access_report_info *info = container_of(work, struct access_report_info, work); char *target_cmd, *agent_cmd; target_cmd = kstrdup_quotable_cmdline(info->target, GFP_KERNEL); agent_cmd = kstrdup_quotable_cmdline(info->agent, GFP_KERNEL); pr_notice_ratelimited( "ptrace %s of \"%s\"[%d] was attempted by \"%s\"[%d]\n", info->access, target_cmd, info->target->pid, agent_cmd, info->agent->pid); kfree(agent_cmd); kfree(target_cmd); put_task_struct(info->agent); put_task_struct(info->target); kfree(info); } /* defers execution because cmdline access can sleep */ static void report_access(const char *access, struct task_struct *target, struct task_struct *agent) { struct access_report_info *info; assert_spin_locked(&target->alloc_lock); /* for target->comm */ if (current->flags & PF_KTHREAD) { /* I don't think kthreads call task_work_run() before exiting. * Imagine angry ranting about procfs here. */ pr_notice_ratelimited( "ptrace %s of \"%s\"[%d] was attempted by \"%s\"[%d]\n", access, target->comm, target->pid, agent->comm, agent->pid); return; } info = kmalloc(sizeof(*info), GFP_ATOMIC); if (!info) return; init_task_work(&info->work, __report_access); get_task_struct(target); get_task_struct(agent); info->access = access; info->target = target; info->agent = agent; if (task_work_add(current, &info->work, TWA_RESUME) == 0) return; /* success */ WARN(1, "report_access called from exiting task"); put_task_struct(target); put_task_struct(agent); kfree(info); } /** * yama_relation_cleanup - remove invalid entries from the relation list * @work: unused * */ static void yama_relation_cleanup(struct work_struct *work) { struct ptrace_relation *relation; spin_lock(&ptracer_relations_lock); rcu_read_lock(); list_for_each_entry_rcu(relation, &ptracer_relations, node) { if (relation->invalid) { list_del_rcu(&relation->node); kfree_rcu(relation, rcu); } } rcu_read_unlock(); spin_unlock(&ptracer_relations_lock); } /** * yama_ptracer_add - add/replace an exception for this tracer/tracee pair * @tracer: the task_struct of the process doing the ptrace * @tracee: the task_struct of the process to be ptraced * * Each tracee can have, at most, one tracer registered. Each time this * is called, the prior registered tracer will be replaced for the tracee. * * Returns 0 if relationship was added, -ve on error. */ static int yama_ptracer_add(struct task_struct *tracer, struct task_struct *tracee) { struct ptrace_relation *relation, *added; added = kmalloc(sizeof(*added), GFP_KERNEL); if (!added) return -ENOMEM; added->tracee = tracee; added->tracer = tracer; added->invalid = false; spin_lock(&ptracer_relations_lock); rcu_read_lock(); list_for_each_entry_rcu(relation, &ptracer_relations, node) { if (relation->invalid) continue; if (relation->tracee == tracee) { list_replace_rcu(&relation->node, &added->node); kfree_rcu(relation, rcu); goto out; } } list_add_rcu(&added->node, &ptracer_relations); out: rcu_read_unlock(); spin_unlock(&ptracer_relations_lock); return 0; } /** * yama_ptracer_del - remove exceptions related to the given tasks * @tracer: remove any relation where tracer task matches * @tracee: remove any relation where tracee task matches */ static void yama_ptracer_del(struct task_struct *tracer, struct task_struct *tracee) { struct ptrace_relation *relation; bool marked = false; rcu_read_lock(); list_for_each_entry_rcu(relation, &ptracer_relations, node) { if (relation->invalid) continue; if (relation->tracee == tracee || (tracer && relation->tracer == tracer)) { relation->invalid = true; marked = true; } } rcu_read_unlock(); if (marked) schedule_work(&yama_relation_work); } /** * yama_task_free - check for task_pid to remove from exception list * @task: task being removed */ static void yama_task_free(struct task_struct *task) { yama_ptracer_del(task, task); } /** * yama_task_prctl - check for Yama-specific prctl operations * @option: operation * @arg2: argument * @arg3: argument * @arg4: argument * @arg5: argument * * Return 0 on success, -ve on error. -ENOSYS is returned when Yama * does not handle the given option. */ static int yama_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { int rc = -ENOSYS; struct task_struct *myself; switch (option) { case PR_SET_PTRACER: /* Since a thread can call prctl(), find the group leader * before calling _add() or _del() on it, since we want * process-level granularity of control. The tracer group * leader checking is handled later when walking the ancestry * at the time of PTRACE_ATTACH check. */ myself = current->group_leader; if (arg2 == 0) { yama_ptracer_del(NULL, myself); rc = 0; } else if (arg2 == PR_SET_PTRACER_ANY || (int)arg2 == -1) { rc = yama_ptracer_add(NULL, myself); } else { struct task_struct *tracer; tracer = find_get_task_by_vpid(arg2); if (!tracer) { rc = -EINVAL; } else { rc = yama_ptracer_add(tracer, myself); put_task_struct(tracer); } } break; } return rc; } /** * task_is_descendant - walk up a process family tree looking for a match * @parent: the process to compare against while walking up from child * @child: the process to start from while looking upwards for parent * * Returns 1 if child is a descendant of parent, 0 if not. */ static int task_is_descendant(struct task_struct *parent, struct task_struct *child) { int rc = 0; struct task_struct *walker = child; if (!parent || !child) return 0; rcu_read_lock(); if (!thread_group_leader(parent)) parent = rcu_dereference(parent->group_leader); while (walker->pid > 0) { if (!thread_group_leader(walker)) walker = rcu_dereference(walker->group_leader); if (walker == parent) { rc = 1; break; } walker = rcu_dereference(walker->real_parent); } rcu_read_unlock(); return rc; } /** * ptracer_exception_found - tracer registered as exception for this tracee * @tracer: the task_struct of the process attempting ptrace * @tracee: the task_struct of the process to be ptraced * * Returns 1 if tracer has a ptracer exception ancestor for tracee. */ static int ptracer_exception_found(struct task_struct *tracer, struct task_struct *tracee) { int rc = 0; struct ptrace_relation *relation; struct task_struct *parent = NULL; bool found = false; rcu_read_lock(); /* * If there's already an active tracing relationship, then make an * exception for the sake of other accesses, like process_vm_rw(). */ parent = ptrace_parent(tracee); if (parent != NULL && same_thread_group(parent, tracer)) { rc = 1; goto unlock; } /* Look for a PR_SET_PTRACER relationship. */ if (!thread_group_leader(tracee)) tracee = rcu_dereference(tracee->group_leader); list_for_each_entry_rcu(relation, &ptracer_relations, node) { if (relation->invalid) continue; if (relation->tracee == tracee) { parent = relation->tracer; found = true; break; } } if (found && (parent == NULL || task_is_descendant(parent, tracer))) rc = 1; unlock: rcu_read_unlock(); return rc; } /** * yama_ptrace_access_check - validate PTRACE_ATTACH calls * @child: task that current task is attempting to ptrace * @mode: ptrace attach mode * * Returns 0 if following the ptrace is allowed, -ve on error. */ static int yama_ptrace_access_check(struct task_struct *child, unsigned int mode) { int rc = 0; /* require ptrace target be a child of ptracer on attach */ if (mode & PTRACE_MODE_ATTACH) { switch (ptrace_scope) { case YAMA_SCOPE_DISABLED: /* No additional restrictions. */ break; case YAMA_SCOPE_RELATIONAL: rcu_read_lock(); if (!pid_alive(child)) rc = -EPERM; if (!rc && !task_is_descendant(current, child) && !ptracer_exception_found(current, child) && !ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE)) rc = -EPERM; rcu_read_unlock(); break; case YAMA_SCOPE_CAPABILITY: rcu_read_lock(); if (!ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE)) rc = -EPERM; rcu_read_unlock(); break; case YAMA_SCOPE_NO_ATTACH: default: rc = -EPERM; break; } } if (rc && (mode & PTRACE_MODE_NOAUDIT) == 0) report_access("attach", child, current); return rc; } /** * yama_ptrace_traceme - validate PTRACE_TRACEME calls * @parent: task that will become the ptracer of the current task * * Returns 0 if following the ptrace is allowed, -ve on error. */ static int yama_ptrace_traceme(struct task_struct *parent) { int rc = 0; /* Only disallow PTRACE_TRACEME on more aggressive settings. */ switch (ptrace_scope) { case YAMA_SCOPE_CAPABILITY: if (!has_ns_capability(parent, current_user_ns(), CAP_SYS_PTRACE)) rc = -EPERM; break; case YAMA_SCOPE_NO_ATTACH: rc = -EPERM; break; } if (rc) { task_lock(current); report_access("traceme", current, parent); task_unlock(current); } return rc; } static const struct lsm_id yama_lsmid = { .name = "yama", .id = LSM_ID_YAMA, }; static struct security_hook_list yama_hooks[] __ro_after_init = { LSM_HOOK_INIT(ptrace_access_check, yama_ptrace_access_check), LSM_HOOK_INIT(ptrace_traceme, yama_ptrace_traceme), LSM_HOOK_INIT(task_prctl, yama_task_prctl), LSM_HOOK_INIT(task_free, yama_task_free), }; #ifdef CONFIG_SYSCTL static int yama_dointvec_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table table_copy; if (write && !capable(CAP_SYS_PTRACE)) return -EPERM; /* Lock the max value if it ever gets set. */ table_copy = *table; if (*(int *)table_copy.data == *(int *)table_copy.extra2) table_copy.extra1 = table_copy.extra2; return proc_dointvec_minmax(&table_copy, write, buffer, lenp, ppos); } static int max_scope = YAMA_SCOPE_NO_ATTACH; static const struct ctl_table yama_sysctl_table[] = { { .procname = "ptrace_scope", .data = &ptrace_scope, .maxlen = sizeof(int), .mode = 0644, .proc_handler = yama_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &max_scope, }, }; static void __init yama_init_sysctl(void) { if (!register_sysctl("kernel/yama", yama_sysctl_table)) panic("Yama: sysctl registration failed.\n"); } #else static inline void yama_init_sysctl(void) { } #endif /* CONFIG_SYSCTL */ static int __init yama_init(void) { pr_info("Yama: becoming mindful.\n"); security_add_hooks(yama_hooks, ARRAY_SIZE(yama_hooks), &yama_lsmid); yama_init_sysctl(); return 0; } DEFINE_LSM(yama) = { .name = "yama", .init = yama_init, }; |
| 50 22 21 70 61 1 34 135 61 65 31 1 3 35 29 31 31 31 2 31 23 23 25 25 3 14 1 1 1 2 5 3 3 3 2 2 2389 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SWAPOPS_H #define _LINUX_SWAPOPS_H #include <linux/radix-tree.h> #include <linux/bug.h> #include <linux/mm_types.h> #ifdef CONFIG_MMU #ifdef CONFIG_SWAP #include <linux/swapfile.h> #endif /* CONFIG_SWAP */ /* * swapcache pages are stored in the swapper_space radix tree. We want to * get good packing density in that tree, so the index should be dense in * the low-order bits. * * We arrange the `type' and `offset' fields so that `type' is at the six * high-order bits of the swp_entry_t and `offset' is right-aligned in the * remaining bits. Although `type' itself needs only five bits, we allow for * shmem/tmpfs to shift it all up a further one bit: see swp_to_radix_entry(). * * swp_entry_t's are *never* stored anywhere in their arch-dependent format. */ #define SWP_TYPE_SHIFT (BITS_PER_XA_VALUE - MAX_SWAPFILES_SHIFT) #define SWP_OFFSET_MASK ((1UL << SWP_TYPE_SHIFT) - 1) /* * Definitions only for PFN swap entries (see is_pfn_swap_entry()). To * store PFN, we only need SWP_PFN_BITS bits. Each of the pfn swap entries * can use the extra bits to store other information besides PFN. */ #ifdef MAX_PHYSMEM_BITS #define SWP_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) #else /* MAX_PHYSMEM_BITS */ #define SWP_PFN_BITS min_t(int, \ sizeof(phys_addr_t) * 8 - PAGE_SHIFT, \ SWP_TYPE_SHIFT) #endif /* MAX_PHYSMEM_BITS */ #define SWP_PFN_MASK (BIT(SWP_PFN_BITS) - 1) /** * Migration swap entry specific bitfield definitions. Layout: * * |----------+--------------------| * | swp_type | swp_offset | * |----------+--------+-+-+-------| * | | resv |D|A| PFN | * |----------+--------+-+-+-------| * * @SWP_MIG_YOUNG_BIT: Whether the page used to have young bit set (bit A) * @SWP_MIG_DIRTY_BIT: Whether the page used to have dirty bit set (bit D) * * Note: A/D bits will be stored in migration entries iff there're enough * free bits in arch specific swp offset. By default we'll ignore A/D bits * when migrating a page. Please refer to migration_entry_supports_ad() * for more information. If there're more bits besides PFN and A/D bits, * they should be reserved and always be zeros. */ #define SWP_MIG_YOUNG_BIT (SWP_PFN_BITS) #define SWP_MIG_DIRTY_BIT (SWP_PFN_BITS + 1) #define SWP_MIG_TOTAL_BITS (SWP_PFN_BITS + 2) #define SWP_MIG_YOUNG BIT(SWP_MIG_YOUNG_BIT) #define SWP_MIG_DIRTY BIT(SWP_MIG_DIRTY_BIT) static inline bool is_pfn_swap_entry(swp_entry_t entry); /* Clear all flags but only keep swp_entry_t related information */ static inline pte_t pte_swp_clear_flags(pte_t pte) { if (pte_swp_exclusive(pte)) pte = pte_swp_clear_exclusive(pte); if (pte_swp_soft_dirty(pte)) pte = pte_swp_clear_soft_dirty(pte); if (pte_swp_uffd_wp(pte)) pte = pte_swp_clear_uffd_wp(pte); return pte; } /* * Store a type+offset into a swp_entry_t in an arch-independent format */ static inline swp_entry_t swp_entry(unsigned long type, pgoff_t offset) { swp_entry_t ret; ret.val = (type << SWP_TYPE_SHIFT) | (offset & SWP_OFFSET_MASK); return ret; } /* * Extract the `type' field from a swp_entry_t. The swp_entry_t is in * arch-independent format */ static inline unsigned swp_type(swp_entry_t entry) { return (entry.val >> SWP_TYPE_SHIFT); } /* * Extract the `offset' field from a swp_entry_t. The swp_entry_t is in * arch-independent format */ static inline pgoff_t swp_offset(swp_entry_t entry) { return entry.val & SWP_OFFSET_MASK; } /* * This should only be called upon a pfn swap entry to get the PFN stored * in the swap entry. Please refers to is_pfn_swap_entry() for definition * of pfn swap entry. */ static inline unsigned long swp_offset_pfn(swp_entry_t entry) { VM_BUG_ON(!is_pfn_swap_entry(entry)); return swp_offset(entry) & SWP_PFN_MASK; } /* check whether a pte points to a swap entry */ static inline int is_swap_pte(pte_t pte) { return !pte_none(pte) && !pte_present(pte); } /* * Convert the arch-dependent pte representation of a swp_entry_t into an * arch-independent swp_entry_t. */ static inline swp_entry_t pte_to_swp_entry(pte_t pte) { swp_entry_t arch_entry; pte = pte_swp_clear_flags(pte); arch_entry = __pte_to_swp_entry(pte); return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); } /* * Convert the arch-independent representation of a swp_entry_t into the * arch-dependent pte representation. */ static inline pte_t swp_entry_to_pte(swp_entry_t entry) { swp_entry_t arch_entry; arch_entry = __swp_entry(swp_type(entry), swp_offset(entry)); return __swp_entry_to_pte(arch_entry); } static inline swp_entry_t radix_to_swp_entry(void *arg) { swp_entry_t entry; entry.val = xa_to_value(arg); return entry; } static inline void *swp_to_radix_entry(swp_entry_t entry) { return xa_mk_value(entry.val); } #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset) { return swp_entry(SWP_DEVICE_READ, offset); } static inline swp_entry_t make_writable_device_private_entry(pgoff_t offset) { return swp_entry(SWP_DEVICE_WRITE, offset); } static inline bool is_device_private_entry(swp_entry_t entry) { int type = swp_type(entry); return type == SWP_DEVICE_READ || type == SWP_DEVICE_WRITE; } static inline bool is_writable_device_private_entry(swp_entry_t entry) { return unlikely(swp_type(entry) == SWP_DEVICE_WRITE); } static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset) { return swp_entry(SWP_DEVICE_EXCLUSIVE, offset); } static inline bool is_device_exclusive_entry(swp_entry_t entry) { return swp_type(entry) == SWP_DEVICE_EXCLUSIVE; } #else /* CONFIG_DEVICE_PRIVATE */ static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset) { return swp_entry(0, 0); } static inline swp_entry_t make_writable_device_private_entry(pgoff_t offset) { return swp_entry(0, 0); } static inline bool is_device_private_entry(swp_entry_t entry) { return false; } static inline bool is_writable_device_private_entry(swp_entry_t entry) { return false; } static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset) { return swp_entry(0, 0); } static inline bool is_device_exclusive_entry(swp_entry_t entry) { return false; } #endif /* CONFIG_DEVICE_PRIVATE */ #ifdef CONFIG_MIGRATION static inline int is_migration_entry(swp_entry_t entry) { return unlikely(swp_type(entry) == SWP_MIGRATION_READ || swp_type(entry) == SWP_MIGRATION_READ_EXCLUSIVE || swp_type(entry) == SWP_MIGRATION_WRITE); } static inline int is_writable_migration_entry(swp_entry_t entry) { return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE); } static inline int is_readable_migration_entry(swp_entry_t entry) { return unlikely(swp_type(entry) == SWP_MIGRATION_READ); } static inline int is_readable_exclusive_migration_entry(swp_entry_t entry) { return unlikely(swp_type(entry) == SWP_MIGRATION_READ_EXCLUSIVE); } static inline swp_entry_t make_readable_migration_entry(pgoff_t offset) { return swp_entry(SWP_MIGRATION_READ, offset); } static inline swp_entry_t make_readable_exclusive_migration_entry(pgoff_t offset) { return swp_entry(SWP_MIGRATION_READ_EXCLUSIVE, offset); } static inline swp_entry_t make_writable_migration_entry(pgoff_t offset) { return swp_entry(SWP_MIGRATION_WRITE, offset); } /* * Returns whether the host has large enough swap offset field to support * carrying over pgtable A/D bits for page migrations. The result is * pretty much arch specific. */ static inline bool migration_entry_supports_ad(void) { #ifdef CONFIG_SWAP return swap_migration_ad_supported; #else /* CONFIG_SWAP */ return false; #endif /* CONFIG_SWAP */ } static inline swp_entry_t make_migration_entry_young(swp_entry_t entry) { if (migration_entry_supports_ad()) return swp_entry(swp_type(entry), swp_offset(entry) | SWP_MIG_YOUNG); return entry; } static inline bool is_migration_entry_young(swp_entry_t entry) { if (migration_entry_supports_ad()) return swp_offset(entry) & SWP_MIG_YOUNG; /* Keep the old behavior of aging page after migration */ return false; } static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry) { if (migration_entry_supports_ad()) return swp_entry(swp_type(entry), swp_offset(entry) | SWP_MIG_DIRTY); return entry; } static inline bool is_migration_entry_dirty(swp_entry_t entry) { if (migration_entry_supports_ad()) return swp_offset(entry) & SWP_MIG_DIRTY; /* Keep the old behavior of clean page after migration */ return false; } extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address); extern void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, pte_t *pte); #else /* CONFIG_MIGRATION */ static inline swp_entry_t make_readable_migration_entry(pgoff_t offset) { return swp_entry(0, 0); } static inline swp_entry_t make_readable_exclusive_migration_entry(pgoff_t offset) { return swp_entry(0, 0); } static inline swp_entry_t make_writable_migration_entry(pgoff_t offset) { return swp_entry(0, 0); } static inline int is_migration_entry(swp_entry_t swp) { return 0; } static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { } static inline void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, pte_t *pte) { } static inline int is_writable_migration_entry(swp_entry_t entry) { return 0; } static inline int is_readable_migration_entry(swp_entry_t entry) { return 0; } static inline swp_entry_t make_migration_entry_young(swp_entry_t entry) { return entry; } static inline bool is_migration_entry_young(swp_entry_t entry) { return false; } static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry) { return entry; } static inline bool is_migration_entry_dirty(swp_entry_t entry) { return false; } #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_MEMORY_FAILURE /* * Support for hardware poisoned pages */ static inline swp_entry_t make_hwpoison_entry(struct page *page) { BUG_ON(!PageLocked(page)); return swp_entry(SWP_HWPOISON, page_to_pfn(page)); } static inline int is_hwpoison_entry(swp_entry_t entry) { return swp_type(entry) == SWP_HWPOISON; } #else static inline swp_entry_t make_hwpoison_entry(struct page *page) { return swp_entry(0, 0); } static inline int is_hwpoison_entry(swp_entry_t swp) { return 0; } #endif typedef unsigned long pte_marker; #define PTE_MARKER_UFFD_WP BIT(0) /* * "Poisoned" here is meant in the very general sense of "future accesses are * invalid", instead of referring very specifically to hardware memory errors. * This marker is meant to represent any of various different causes of this. * * Note that, when encountered by the faulting logic, PTEs with this marker will * result in VM_FAULT_HWPOISON and thus regardless trigger hardware memory error * logic. */ #define PTE_MARKER_POISONED BIT(1) /* * Indicates that, on fault, this PTE will case a SIGSEGV signal to be * sent. This means guard markers behave in effect as if the region were mapped * PROT_NONE, rather than if they were a memory hole or equivalent. */ #define PTE_MARKER_GUARD BIT(2) #define PTE_MARKER_MASK (BIT(3) - 1) static inline swp_entry_t make_pte_marker_entry(pte_marker marker) { return swp_entry(SWP_PTE_MARKER, marker); } static inline bool is_pte_marker_entry(swp_entry_t entry) { return swp_type(entry) == SWP_PTE_MARKER; } static inline pte_marker pte_marker_get(swp_entry_t entry) { return swp_offset(entry) & PTE_MARKER_MASK; } static inline bool is_pte_marker(pte_t pte) { return is_swap_pte(pte) && is_pte_marker_entry(pte_to_swp_entry(pte)); } static inline pte_t make_pte_marker(pte_marker marker) { return swp_entry_to_pte(make_pte_marker_entry(marker)); } static inline swp_entry_t make_poisoned_swp_entry(void) { return make_pte_marker_entry(PTE_MARKER_POISONED); } static inline int is_poisoned_swp_entry(swp_entry_t entry) { return is_pte_marker_entry(entry) && (pte_marker_get(entry) & PTE_MARKER_POISONED); } static inline swp_entry_t make_guard_swp_entry(void) { return make_pte_marker_entry(PTE_MARKER_GUARD); } static inline int is_guard_swp_entry(swp_entry_t entry) { return is_pte_marker_entry(entry) && (pte_marker_get(entry) & PTE_MARKER_GUARD); } /* * This is a special version to check pte_none() just to cover the case when * the pte is a pte marker. It existed because in many cases the pte marker * should be seen as a none pte; it's just that we have stored some information * onto the none pte so it becomes not-none any more. * * It should be used when the pte is file-backed, ram-based and backing * userspace pages, like shmem. It is not needed upon pgtables that do not * support pte markers at all. For example, it's not needed on anonymous * memory, kernel-only memory (including when the system is during-boot), * non-ram based generic file-system. It's fine to be used even there, but the * extra pte marker check will be pure overhead. */ static inline int pte_none_mostly(pte_t pte) { return pte_none(pte) || is_pte_marker(pte); } static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry) { struct page *p = pfn_to_page(swp_offset_pfn(entry)); /* * Any use of migration entries may only occur while the * corresponding page is locked */ BUG_ON(is_migration_entry(entry) && !PageLocked(p)); return p; } static inline struct folio *pfn_swap_entry_folio(swp_entry_t entry) { struct folio *folio = pfn_folio(swp_offset_pfn(entry)); /* * Any use of migration entries may only occur while the * corresponding folio is locked */ BUG_ON(is_migration_entry(entry) && !folio_test_locked(folio)); return folio; } /* * A pfn swap entry is a special type of swap entry that always has a pfn stored * in the swap offset. They can either be used to represent unaddressable device * memory, to restrict access to a page undergoing migration or to represent a * pfn which has been hwpoisoned and unmapped. */ static inline bool is_pfn_swap_entry(swp_entry_t entry) { /* Make sure the swp offset can always store the needed fields */ BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS); return is_migration_entry(entry) || is_device_private_entry(entry) || is_device_exclusive_entry(entry) || is_hwpoison_entry(entry); } struct page_vma_mapped_walk; #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION extern int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, struct page *page); extern void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new); extern void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd); static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd) { swp_entry_t arch_entry; if (pmd_swp_soft_dirty(pmd)) pmd = pmd_swp_clear_soft_dirty(pmd); if (pmd_swp_uffd_wp(pmd)) pmd = pmd_swp_clear_uffd_wp(pmd); arch_entry = __pmd_to_swp_entry(pmd); return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); } static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) { swp_entry_t arch_entry; arch_entry = __swp_entry(swp_type(entry), swp_offset(entry)); return __swp_entry_to_pmd(arch_entry); } static inline int is_pmd_migration_entry(pmd_t pmd) { return is_swap_pmd(pmd) && is_migration_entry(pmd_to_swp_entry(pmd)); } #else /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ static inline int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, struct page *page) { BUILD_BUG(); } static inline void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) { BUILD_BUG(); } static inline void pmd_migration_entry_wait(struct mm_struct *m, pmd_t *p) { } static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd) { return swp_entry(0, 0); } static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) { return __pmd(0); } static inline int is_pmd_migration_entry(pmd_t pmd) { return 0; } #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ static inline int non_swap_entry(swp_entry_t entry) { return swp_type(entry) >= MAX_SWAPFILES; } #endif /* CONFIG_MMU */ #endif /* _LINUX_SWAPOPS_H */ |
| 387 4087 17 1590 4306 4215 475 2137 188 3196 2512 47 2293 3 1684 2443 2219 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2012 Regents of the University of California */ #ifndef _ASM_RISCV_PGTABLE_64_H #define _ASM_RISCV_PGTABLE_64_H #include <linux/bits.h> #include <linux/const.h> #include <asm/errata_list.h> extern bool pgtable_l4_enabled; extern bool pgtable_l5_enabled; #define PGDIR_SHIFT_L3 30 #define PGDIR_SHIFT_L4 39 #define PGDIR_SHIFT_L5 48 #define PGDIR_SHIFT (pgtable_l5_enabled ? PGDIR_SHIFT_L5 : \ (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)) /* Size of region mapped by a page global directory */ #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) /* p4d is folded into pgd in case of 4-level page table */ #define P4D_SHIFT_L3 30 #define P4D_SHIFT_L4 39 #define P4D_SHIFT_L5 39 #define P4D_SHIFT (pgtable_l5_enabled ? P4D_SHIFT_L5 : \ (pgtable_l4_enabled ? P4D_SHIFT_L4 : P4D_SHIFT_L3)) #define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) #define P4D_MASK (~(P4D_SIZE - 1)) /* pud is folded into pgd in case of 3-level page table */ #define PUD_SHIFT 30 #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE - 1)) #define PMD_SHIFT 21 /* Size of region mapped by a page middle directory */ #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE - 1)) /* Page 4th Directory entry */ typedef struct { unsigned long p4d; } p4d_t; #define p4d_val(x) ((x).p4d) #define __p4d(x) ((p4d_t) { (x) }) #define PTRS_PER_P4D (PAGE_SIZE / sizeof(p4d_t)) /* Page Upper Directory entry */ typedef struct { unsigned long pud; } pud_t; #define pud_val(x) ((x).pud) #define __pud(x) ((pud_t) { (x) }) #define PTRS_PER_PUD (PAGE_SIZE / sizeof(pud_t)) /* Page Middle Directory entry */ typedef struct { unsigned long pmd; } pmd_t; #define pmd_val(x) ((x).pmd) #define __pmd(x) ((pmd_t) { (x) }) #define PTRS_PER_PMD (PAGE_SIZE / sizeof(pmd_t)) /* * rv64 PTE format: * | 63 | 62 61 | 60 54 | 53 10 | 9 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 * N MT RSV PFN reserved for SW D A G U X W R V */ #define _PAGE_PFN_MASK GENMASK(53, 10) /* * [63] Svnapot definitions: * 0 Svnapot disabled * 1 Svnapot enabled */ #define _PAGE_NAPOT_SHIFT 63 #define _PAGE_NAPOT BIT(_PAGE_NAPOT_SHIFT) /* * Only 64KB (order 4) napot ptes supported. */ #define NAPOT_CONT_ORDER_BASE 4 enum napot_cont_order { NAPOT_CONT64KB_ORDER = NAPOT_CONT_ORDER_BASE, NAPOT_ORDER_MAX, }; #define for_each_napot_order(order) \ for (order = NAPOT_CONT_ORDER_BASE; order < NAPOT_ORDER_MAX; order++) #define for_each_napot_order_rev(order) \ for (order = NAPOT_ORDER_MAX - 1; \ order >= NAPOT_CONT_ORDER_BASE; order--) #define napot_cont_order(val) (__builtin_ctzl((val.pte >> _PAGE_PFN_SHIFT) << 1)) #define napot_cont_shift(order) ((order) + PAGE_SHIFT) #define napot_cont_size(order) BIT(napot_cont_shift(order)) #define napot_cont_mask(order) (~(napot_cont_size(order) - 1UL)) #define napot_pte_num(order) BIT(order) #ifdef CONFIG_RISCV_ISA_SVNAPOT #define HUGE_MAX_HSTATE (2 + (NAPOT_ORDER_MAX - NAPOT_CONT_ORDER_BASE)) #else #define HUGE_MAX_HSTATE 2 #endif /* * [62:61] Svpbmt Memory Type definitions: * * 00 - PMA Normal Cacheable, No change to implied PMA memory type * 01 - NC Non-cacheable, idempotent, weakly-ordered Main Memory * 10 - IO Non-cacheable, non-idempotent, strongly-ordered I/O memory * 11 - Rsvd Reserved for future standard use */ #define _PAGE_NOCACHE_SVPBMT (1UL << 61) #define _PAGE_IO_SVPBMT (1UL << 62) #define _PAGE_MTMASK_SVPBMT (_PAGE_NOCACHE_SVPBMT | _PAGE_IO_SVPBMT) /* * [63:59] T-Head Memory Type definitions: * bit[63] SO - Strong Order * bit[62] C - Cacheable * bit[61] B - Bufferable * bit[60] SH - Shareable * bit[59] Sec - Trustable * 00110 - NC Weakly-ordered, Non-cacheable, Bufferable, Shareable, Non-trustable * 01110 - PMA Weakly-ordered, Cacheable, Bufferable, Shareable, Non-trustable * 10010 - IO Strongly-ordered, Non-cacheable, Non-bufferable, Shareable, Non-trustable */ #define _PAGE_PMA_THEAD ((1UL << 62) | (1UL << 61) | (1UL << 60)) #define _PAGE_NOCACHE_THEAD ((1UL << 61) | (1UL << 60)) #define _PAGE_IO_THEAD ((1UL << 63) | (1UL << 60)) #define _PAGE_MTMASK_THEAD (_PAGE_PMA_THEAD | _PAGE_IO_THEAD | (1UL << 59)) static inline u64 riscv_page_mtmask(void) { u64 val; ALT_SVPBMT(val, _PAGE_MTMASK); return val; } static inline u64 riscv_page_nocache(void) { u64 val; ALT_SVPBMT(val, _PAGE_NOCACHE); return val; } static inline u64 riscv_page_io(void) { u64 val; ALT_SVPBMT(val, _PAGE_IO); return val; } #define _PAGE_NOCACHE riscv_page_nocache() #define _PAGE_IO riscv_page_io() #define _PAGE_MTMASK riscv_page_mtmask() /* Set of bits to preserve across pte_modify() */ #define _PAGE_CHG_MASK (~(unsigned long)(_PAGE_PRESENT | _PAGE_READ | \ _PAGE_WRITE | _PAGE_EXEC | \ _PAGE_USER | _PAGE_GLOBAL | \ _PAGE_MTMASK)) static inline int pud_present(pud_t pud) { return (pud_val(pud) & _PAGE_PRESENT); } static inline int pud_none(pud_t pud) { return (pud_val(pud) == 0); } static inline int pud_bad(pud_t pud) { return !pud_present(pud) || (pud_val(pud) & _PAGE_LEAF); } #define pud_leaf pud_leaf static inline bool pud_leaf(pud_t pud) { return pud_present(pud) && (pud_val(pud) & _PAGE_LEAF); } static inline int pud_user(pud_t pud) { return pud_val(pud) & _PAGE_USER; } static inline void set_pud(pud_t *pudp, pud_t pud) { WRITE_ONCE(*pudp, pud); } static inline void pud_clear(pud_t *pudp) { set_pud(pudp, __pud(0)); } static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot) { return __pud((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); } static inline unsigned long _pud_pfn(pud_t pud) { return __page_val_to_pfn(pud_val(pud)); } static inline pmd_t *pud_pgtable(pud_t pud) { return (pmd_t *)pfn_to_virt(__page_val_to_pfn(pud_val(pud))); } static inline struct page *pud_page(pud_t pud) { return pfn_to_page(__page_val_to_pfn(pud_val(pud))); } #define mm_p4d_folded mm_p4d_folded static inline bool mm_p4d_folded(struct mm_struct *mm) { if (pgtable_l5_enabled) return false; return true; } #define mm_pud_folded mm_pud_folded static inline bool mm_pud_folded(struct mm_struct *mm) { if (pgtable_l4_enabled) return false; return true; } #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot) { unsigned long prot_val = pgprot_val(prot); ALT_THEAD_PMA(prot_val); return __pmd((pfn << _PAGE_PFN_SHIFT) | prot_val); } static inline unsigned long _pmd_pfn(pmd_t pmd) { return __page_val_to_pfn(pmd_val(pmd)); } #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) #define pud_ERROR(e) \ pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e)) #define p4d_ERROR(e) \ pr_err("%s:%d: bad p4d %016lx.\n", __FILE__, __LINE__, p4d_val(e)) static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) { if (pgtable_l4_enabled) WRITE_ONCE(*p4dp, p4d); else set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) }); } static inline int p4d_none(p4d_t p4d) { if (pgtable_l4_enabled) return (p4d_val(p4d) == 0); return 0; } static inline int p4d_present(p4d_t p4d) { if (pgtable_l4_enabled) return (p4d_val(p4d) & _PAGE_PRESENT); return 1; } static inline int p4d_bad(p4d_t p4d) { if (pgtable_l4_enabled) return !p4d_present(p4d); return 0; } static inline void p4d_clear(p4d_t *p4d) { if (pgtable_l4_enabled) set_p4d(p4d, __p4d(0)); } static inline p4d_t pfn_p4d(unsigned long pfn, pgprot_t prot) { return __p4d((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); } static inline unsigned long _p4d_pfn(p4d_t p4d) { return __page_val_to_pfn(p4d_val(p4d)); } static inline pud_t *p4d_pgtable(p4d_t p4d) { if (pgtable_l4_enabled) return (pud_t *)pfn_to_virt(__page_val_to_pfn(p4d_val(p4d))); return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) }); } #define p4d_page_vaddr(p4d) ((unsigned long)p4d_pgtable(p4d)) static inline struct page *p4d_page(p4d_t p4d) { return pfn_to_page(__page_val_to_pfn(p4d_val(p4d))); } #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) #define pud_offset pud_offset pud_t *pud_offset(p4d_t *p4d, unsigned long address); static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) { if (pgtable_l5_enabled) WRITE_ONCE(*pgdp, pgd); else set_p4d((p4d_t *)pgdp, (p4d_t){ pgd_val(pgd) }); } static inline int pgd_none(pgd_t pgd) { if (pgtable_l5_enabled) return (pgd_val(pgd) == 0); return 0; } static inline int pgd_present(pgd_t pgd) { if (pgtable_l5_enabled) return (pgd_val(pgd) & _PAGE_PRESENT); return 1; } static inline int pgd_bad(pgd_t pgd) { if (pgtable_l5_enabled) return !pgd_present(pgd); return 0; } static inline void pgd_clear(pgd_t *pgd) { if (pgtable_l5_enabled) set_pgd(pgd, __pgd(0)); } static inline p4d_t *pgd_pgtable(pgd_t pgd) { if (pgtable_l5_enabled) return (p4d_t *)pfn_to_virt(__page_val_to_pfn(pgd_val(pgd))); return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) }); } #define pgd_page_vaddr(pgd) ((unsigned long)pgd_pgtable(pgd)) static inline struct page *pgd_page(pgd_t pgd) { return pfn_to_page(__page_val_to_pfn(pgd_val(pgd))); } #define pgd_page(pgd) pgd_page(pgd) #define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) #define p4d_offset p4d_offset p4d_t *p4d_offset(pgd_t *pgd, unsigned long address); #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline int pte_devmap(pte_t pte); static inline pte_t pmd_pte(pmd_t pmd); static inline pte_t pud_pte(pud_t pud); static inline int pmd_devmap(pmd_t pmd) { return pte_devmap(pmd_pte(pmd)); } static inline int pud_devmap(pud_t pud) { return pte_devmap(pud_pte(pud)); } static inline int pgd_devmap(pgd_t pgd) { return 0; } #endif #endif /* _ASM_RISCV_PGTABLE_64_H */ |
| 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 | /* SPDX-License-Identifier: GPL-2.0 */ /* * linux/can/dev.h * * Definitions for the CAN network device driver interface * * Copyright (C) 2006 Andrey Volkov <avolkov@varma-el.com> * Varma Electronics Oy * * Copyright (C) 2008 Wolfgang Grandegger <wg@grandegger.com> * */ #ifndef _CAN_DEV_H #define _CAN_DEV_H #include <linux/can.h> #include <linux/can/bittiming.h> #include <linux/can/error.h> #include <linux/can/length.h> #include <linux/can/netlink.h> #include <linux/can/skb.h> #include <linux/ethtool.h> #include <linux/netdevice.h> /* * CAN mode */ enum can_mode { CAN_MODE_STOP = 0, CAN_MODE_START, CAN_MODE_SLEEP }; enum can_termination_gpio { CAN_TERMINATION_GPIO_DISABLED = 0, CAN_TERMINATION_GPIO_ENABLED, CAN_TERMINATION_GPIO_MAX, }; struct data_bittiming_params { const struct can_bittiming_const *data_bittiming_const; struct can_bittiming data_bittiming; const struct can_tdc_const *tdc_const; struct can_tdc tdc; const u32 *data_bitrate_const; unsigned int data_bitrate_const_cnt; int (*do_set_data_bittiming)(struct net_device *dev); int (*do_get_auto_tdcv)(const struct net_device *dev, u32 *tdcv); }; /* * CAN common private data */ struct can_priv { struct net_device *dev; struct can_device_stats can_stats; const struct can_bittiming_const *bittiming_const; struct can_bittiming bittiming; struct data_bittiming_params fd; unsigned int bitrate_const_cnt; const u32 *bitrate_const; u32 bitrate_max; struct can_clock clock; unsigned int termination_const_cnt; const u16 *termination_const; u16 termination; struct gpio_desc *termination_gpio; u16 termination_gpio_ohms[CAN_TERMINATION_GPIO_MAX]; unsigned int echo_skb_max; struct sk_buff **echo_skb; enum can_state state; /* CAN controller features - see include/uapi/linux/can/netlink.h */ u32 ctrlmode; /* current options setting */ u32 ctrlmode_supported; /* options that can be modified by netlink */ int restart_ms; struct delayed_work restart_work; int (*do_set_bittiming)(struct net_device *dev); int (*do_set_mode)(struct net_device *dev, enum can_mode mode); int (*do_set_termination)(struct net_device *dev, u16 term); int (*do_get_state)(const struct net_device *dev, enum can_state *state); int (*do_get_berr_counter)(const struct net_device *dev, struct can_berr_counter *bec); }; static inline bool can_tdc_is_enabled(const struct can_priv *priv) { return !!(priv->ctrlmode & CAN_CTRLMODE_TDC_MASK); } /* * can_get_relative_tdco() - TDCO relative to the sample point * * struct can_tdc::tdco represents the absolute offset from TDCV. Some * controllers use instead an offset relative to the Sample Point (SP) * such that: * * SSP = TDCV + absolute TDCO * = TDCV + SP + relative TDCO * * -+----------- one bit ----------+-- TX pin * |<--- Sample Point --->| * * --+----------- one bit ----------+-- RX pin * |<-------- TDCV -------->| * |<------------------------>| absolute TDCO * |<--- Sample Point --->| * | |<->| relative TDCO * |<------------- Secondary Sample Point ------------>| */ static inline s32 can_get_relative_tdco(const struct can_priv *priv) { const struct can_bittiming *dbt = &priv->fd.data_bittiming; s32 sample_point_in_tc = (CAN_SYNC_SEG + dbt->prop_seg + dbt->phase_seg1) * dbt->brp; return (s32)priv->fd.tdc.tdco - sample_point_in_tc; } /* helper to define static CAN controller features at device creation time */ static inline int __must_check can_set_static_ctrlmode(struct net_device *dev, u32 static_mode) { struct can_priv *priv = netdev_priv(dev); /* alloc_candev() succeeded => netdev_priv() is valid at this point */ if (priv->ctrlmode_supported & static_mode) { netdev_warn(dev, "Controller features can not be supported and static at the same time\n"); return -EINVAL; } priv->ctrlmode = static_mode; /* override MTU which was set by default in can_setup()? */ if (static_mode & CAN_CTRLMODE_FD) dev->mtu = CANFD_MTU; return 0; } static inline u32 can_get_static_ctrlmode(struct can_priv *priv) { return priv->ctrlmode & ~priv->ctrlmode_supported; } static inline bool can_is_canxl_dev_mtu(unsigned int mtu) { return (mtu >= CANXL_MIN_MTU && mtu <= CANXL_MAX_MTU); } /* drop skb if it does not contain a valid CAN frame for sending */ static inline bool can_dev_dropped_skb(struct net_device *dev, struct sk_buff *skb) { struct can_priv *priv = netdev_priv(dev); if (priv->ctrlmode & CAN_CTRLMODE_LISTENONLY) { netdev_info_once(dev, "interface in listen only mode, dropping skb\n"); kfree_skb(skb); dev->stats.tx_dropped++; return true; } return can_dropped_invalid_skb(dev, skb); } void can_setup(struct net_device *dev); struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, unsigned int txqs, unsigned int rxqs); #define alloc_candev(sizeof_priv, echo_skb_max) \ alloc_candev_mqs(sizeof_priv, echo_skb_max, 1, 1) #define alloc_candev_mq(sizeof_priv, echo_skb_max, count) \ alloc_candev_mqs(sizeof_priv, echo_skb_max, count, count) void free_candev(struct net_device *dev); /* a candev safe wrapper around netdev_priv */ struct can_priv *safe_candev_priv(struct net_device *dev); int open_candev(struct net_device *dev); void close_candev(struct net_device *dev); int can_change_mtu(struct net_device *dev, int new_mtu); int can_eth_ioctl_hwts(struct net_device *netdev, struct ifreq *ifr, int cmd); int can_ethtool_op_get_ts_info_hwts(struct net_device *dev, struct kernel_ethtool_ts_info *info); int register_candev(struct net_device *dev); void unregister_candev(struct net_device *dev); int can_restart_now(struct net_device *dev); void can_bus_off(struct net_device *dev); const char *can_get_state_str(const enum can_state state); void can_state_get_by_berr_counter(const struct net_device *dev, const struct can_berr_counter *bec, enum can_state *tx_state, enum can_state *rx_state); void can_change_state(struct net_device *dev, struct can_frame *cf, enum can_state tx_state, enum can_state rx_state); #ifdef CONFIG_OF void of_can_transceiver(struct net_device *dev); #else static inline void of_can_transceiver(struct net_device *dev) { } #endif extern struct rtnl_link_ops can_link_ops; int can_netlink_register(void); void can_netlink_unregister(void); #endif /* !_CAN_DEV_H */ |
| 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Virtio PCI driver - common functionality for all device versions * * This module allows virtio devices to be used over a virtual PCI device. * This can be used with QEMU based VMMs like KVM or Xen. * * Copyright IBM Corp. 2007 * Copyright Red Hat, Inc. 2014 * * Authors: * Anthony Liguori <aliguori@us.ibm.com> * Rusty Russell <rusty@rustcorp.com.au> * Michael S. Tsirkin <mst@redhat.com> */ #include "virtio_pci_common.h" static bool force_legacy = false; #if IS_ENABLED(CONFIG_VIRTIO_PCI_LEGACY) module_param(force_legacy, bool, 0444); MODULE_PARM_DESC(force_legacy, "Force legacy mode for transitional virtio 1 devices"); #endif bool vp_is_avq(struct virtio_device *vdev, unsigned int index) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) return false; return index == vp_dev->admin_vq.vq_index; } /* wait for pending irq handlers */ void vp_synchronize_vectors(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i; if (vp_dev->intx_enabled) synchronize_irq(vp_dev->pci_dev->irq); for (i = 0; i < vp_dev->msix_vectors; ++i) synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i)); } /* the notify function used when creating a virt queue */ bool vp_notify(struct virtqueue *vq) { /* we write the queue's selector into the notification register to * signal the other end */ iowrite16(vq->index, (void __iomem *)vq->priv); return true; } /* Notify all slow path virtqueues on an interrupt. */ static void vp_vring_slow_path_interrupt(int irq, struct virtio_pci_device *vp_dev) { struct virtio_pci_vq_info *info; unsigned long flags; spin_lock_irqsave(&vp_dev->lock, flags); list_for_each_entry(info, &vp_dev->slow_virtqueues, node) vring_interrupt(irq, info->vq); spin_unlock_irqrestore(&vp_dev->lock, flags); } /* Handle a configuration change: Tell driver if it wants to know. */ static irqreturn_t vp_config_changed(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; virtio_config_changed(&vp_dev->vdev); vp_vring_slow_path_interrupt(irq, vp_dev); return IRQ_HANDLED; } /* Notify all virtqueues on an interrupt. */ static irqreturn_t vp_vring_interrupt(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; struct virtio_pci_vq_info *info; irqreturn_t ret = IRQ_NONE; unsigned long flags; spin_lock_irqsave(&vp_dev->lock, flags); list_for_each_entry(info, &vp_dev->virtqueues, node) { if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) ret = IRQ_HANDLED; } spin_unlock_irqrestore(&vp_dev->lock, flags); return ret; } /* A small wrapper to also acknowledge the interrupt when it's handled. * I really need an EIO hook for the vring so I can ack the interrupt once we * know that we'll be handling the IRQ but before we invoke the callback since * the callback may notify the host which results in the host attempting to * raise an interrupt that we would then mask once we acknowledged the * interrupt. */ static irqreturn_t vp_interrupt(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; u8 isr; /* reading the ISR has the effect of also clearing it so it's very * important to save off the value. */ isr = ioread8(vp_dev->isr); /* It's definitely not us if the ISR was not high */ if (!isr) return IRQ_NONE; /* Configuration change? Tell driver if it wants to know. */ if (isr & VIRTIO_PCI_ISR_CONFIG) vp_config_changed(irq, opaque); return vp_vring_interrupt(irq, opaque); } static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, bool per_vq_vectors, struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); const char *name = dev_name(&vp_dev->vdev.dev); unsigned int flags = PCI_IRQ_MSIX; unsigned int i, v; int err = -ENOMEM; vp_dev->msix_vectors = nvectors; vp_dev->msix_names = kmalloc_array(nvectors, sizeof(*vp_dev->msix_names), GFP_KERNEL); if (!vp_dev->msix_names) goto error; vp_dev->msix_affinity_masks = kcalloc(nvectors, sizeof(*vp_dev->msix_affinity_masks), GFP_KERNEL); if (!vp_dev->msix_affinity_masks) goto error; for (i = 0; i < nvectors; ++i) if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], GFP_KERNEL)) goto error; if (!per_vq_vectors) desc = NULL; if (desc) { flags |= PCI_IRQ_AFFINITY; desc->pre_vectors++; /* virtio config vector */ } err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors, nvectors, flags, desc); if (err < 0) goto error; vp_dev->msix_enabled = 1; /* Set the vector used for configuration */ v = vp_dev->msix_used_vectors; snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-config", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), vp_config_changed, 0, vp_dev->msix_names[v], vp_dev); if (err) goto error; ++vp_dev->msix_used_vectors; v = vp_dev->config_vector(vp_dev, v); /* Verify we had enough resources to assign the vector */ if (v == VIRTIO_MSI_NO_VECTOR) { err = -EBUSY; goto error; } if (!per_vq_vectors) { /* Shared vector for all VQs */ v = vp_dev->msix_used_vectors; snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-virtqueues", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), vp_vring_interrupt, 0, vp_dev->msix_names[v], vp_dev); if (err) goto error; ++vp_dev->msix_used_vectors; } return 0; error: return err; } static bool vp_is_slow_path_vector(u16 msix_vec) { return msix_vec == VP_MSIX_CONFIG_VECTOR; } static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index, void (*callback)(struct virtqueue *vq), const char *name, bool ctx, u16 msix_vec, struct virtio_pci_vq_info **p_info) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL); struct virtqueue *vq; unsigned long flags; /* fill out our structure that represents an active queue */ if (!info) return ERR_PTR(-ENOMEM); vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx, msix_vec); if (IS_ERR(vq)) goto out_info; info->vq = vq; if (callback) { spin_lock_irqsave(&vp_dev->lock, flags); if (!vp_is_slow_path_vector(msix_vec)) list_add(&info->node, &vp_dev->virtqueues); else list_add(&info->node, &vp_dev->slow_virtqueues); spin_unlock_irqrestore(&vp_dev->lock, flags); } else { INIT_LIST_HEAD(&info->node); } *p_info = info; return vq; out_info: kfree(info); return vq; } static void vp_del_vq(struct virtqueue *vq, struct virtio_pci_vq_info *info) { struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); unsigned long flags; /* * If it fails during re-enable reset vq. This way we won't rejoin * info->node to the queue. Prevent unexpected irqs. */ if (!vq->reset) { spin_lock_irqsave(&vp_dev->lock, flags); list_del(&info->node); spin_unlock_irqrestore(&vp_dev->lock, flags); } vp_dev->del_vq(info); kfree(info); } /* the config->del_vqs() implementation */ void vp_del_vqs(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info; struct virtqueue *vq, *n; int i; list_for_each_entry_safe(vq, n, &vdev->vqs, list) { info = vp_is_avq(vdev, vq->index) ? vp_dev->admin_vq.info : vp_dev->vqs[vq->index]; if (vp_dev->per_vq_vectors) { int v = info->msix_vector; if (v != VIRTIO_MSI_NO_VECTOR && !vp_is_slow_path_vector(v)) { int irq = pci_irq_vector(vp_dev->pci_dev, v); irq_update_affinity_hint(irq, NULL); free_irq(irq, vq); } } vp_del_vq(vq, info); } vp_dev->per_vq_vectors = false; if (vp_dev->intx_enabled) { free_irq(vp_dev->pci_dev->irq, vp_dev); vp_dev->intx_enabled = 0; } for (i = 0; i < vp_dev->msix_used_vectors; ++i) free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev); if (vp_dev->msix_affinity_masks) { for (i = 0; i < vp_dev->msix_vectors; i++) free_cpumask_var(vp_dev->msix_affinity_masks[i]); } if (vp_dev->msix_enabled) { /* Disable the vector used for configuration */ vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR); pci_free_irq_vectors(vp_dev->pci_dev); vp_dev->msix_enabled = 0; } vp_dev->msix_vectors = 0; vp_dev->msix_used_vectors = 0; kfree(vp_dev->msix_names); vp_dev->msix_names = NULL; kfree(vp_dev->msix_affinity_masks); vp_dev->msix_affinity_masks = NULL; kfree(vp_dev->vqs); vp_dev->vqs = NULL; } enum vp_vq_vector_policy { VP_VQ_VECTOR_POLICY_EACH, VP_VQ_VECTOR_POLICY_SHARED_SLOW, VP_VQ_VECTOR_POLICY_SHARED, }; static struct virtqueue * vp_find_one_vq_msix(struct virtio_device *vdev, int queue_idx, vq_callback_t *callback, const char *name, bool ctx, bool slow_path, int *allocated_vectors, enum vp_vq_vector_policy vector_policy, struct virtio_pci_vq_info **p_info) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtqueue *vq; u16 msix_vec; int err; if (!callback) msix_vec = VIRTIO_MSI_NO_VECTOR; else if (vector_policy == VP_VQ_VECTOR_POLICY_EACH || (vector_policy == VP_VQ_VECTOR_POLICY_SHARED_SLOW && !slow_path)) msix_vec = (*allocated_vectors)++; else if (vector_policy != VP_VQ_VECTOR_POLICY_EACH && slow_path) msix_vec = VP_MSIX_CONFIG_VECTOR; else msix_vec = VP_MSIX_VQ_VECTOR; vq = vp_setup_vq(vdev, queue_idx, callback, name, ctx, msix_vec, p_info); if (IS_ERR(vq)) return vq; if (vector_policy == VP_VQ_VECTOR_POLICY_SHARED || msix_vec == VIRTIO_MSI_NO_VECTOR || vp_is_slow_path_vector(msix_vec)) return vq; /* allocate per-vq irq if available and necessary */ snprintf(vp_dev->msix_names[msix_vec], sizeof(*vp_dev->msix_names), "%s-%s", dev_name(&vp_dev->vdev.dev), name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), vring_interrupt, 0, vp_dev->msix_names[msix_vec], vq); if (err) { vp_del_vq(vq, *p_info); return ERR_PTR(err); } return vq; } static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], struct virtqueue_info vqs_info[], enum vp_vq_vector_policy vector_policy, struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_admin_vq *avq = &vp_dev->admin_vq; struct virtqueue_info *vqi; int i, err, nvectors, allocated_vectors, queue_idx = 0; struct virtqueue *vq; bool per_vq_vectors; u16 avq_num = 0; vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); if (!vp_dev->vqs) return -ENOMEM; if (vp_dev->avq_index) { err = vp_dev->avq_index(vdev, &avq->vq_index, &avq_num); if (err) goto error_find; } per_vq_vectors = vector_policy != VP_VQ_VECTOR_POLICY_SHARED; if (per_vq_vectors) { /* Best option: one for change interrupt, one per vq. */ nvectors = 1; for (i = 0; i < nvqs; ++i) { vqi = &vqs_info[i]; if (vqi->name && vqi->callback) ++nvectors; } if (avq_num && vector_policy == VP_VQ_VECTOR_POLICY_EACH) ++nvectors; } else { /* Second best: one for change, shared for all vqs. */ nvectors = 2; } err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors, desc); if (err) goto error_find; vp_dev->per_vq_vectors = per_vq_vectors; allocated_vectors = vp_dev->msix_used_vectors; for (i = 0; i < nvqs; ++i) { vqi = &vqs_info[i]; if (!vqi->name) { vqs[i] = NULL; continue; } vqs[i] = vp_find_one_vq_msix(vdev, queue_idx++, vqi->callback, vqi->name, vqi->ctx, false, &allocated_vectors, vector_policy, &vp_dev->vqs[i]); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto error_find; } } if (!avq_num) return 0; sprintf(avq->name, "avq.%u", avq->vq_index); vq = vp_find_one_vq_msix(vdev, avq->vq_index, vp_modern_avq_done, avq->name, false, true, &allocated_vectors, vector_policy, &vp_dev->admin_vq.info); if (IS_ERR(vq)) { err = PTR_ERR(vq); goto error_find; } return 0; error_find: vp_del_vqs(vdev); return err; } static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], struct virtqueue_info vqs_info[]) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_admin_vq *avq = &vp_dev->admin_vq; int i, err, queue_idx = 0; struct virtqueue *vq; u16 avq_num = 0; vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); if (!vp_dev->vqs) return -ENOMEM; if (vp_dev->avq_index) { err = vp_dev->avq_index(vdev, &avq->vq_index, &avq_num); if (err) goto out_del_vqs; } err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, dev_name(&vdev->dev), vp_dev); if (err) goto out_del_vqs; vp_dev->intx_enabled = 1; vp_dev->per_vq_vectors = false; for (i = 0; i < nvqs; ++i) { struct virtqueue_info *vqi = &vqs_info[i]; if (!vqi->name) { vqs[i] = NULL; continue; } vqs[i] = vp_setup_vq(vdev, queue_idx++, vqi->callback, vqi->name, vqi->ctx, VIRTIO_MSI_NO_VECTOR, &vp_dev->vqs[i]); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto out_del_vqs; } } if (!avq_num) return 0; sprintf(avq->name, "avq.%u", avq->vq_index); vq = vp_setup_vq(vdev, queue_idx++, vp_modern_avq_done, avq->name, false, VIRTIO_MSI_NO_VECTOR, &vp_dev->admin_vq.info); if (IS_ERR(vq)) { err = PTR_ERR(vq); goto out_del_vqs; } return 0; out_del_vqs: vp_del_vqs(vdev); return err; } /* the config->find_vqs() implementation */ int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], struct virtqueue_info vqs_info[], struct irq_affinity *desc) { int err; /* Try MSI-X with one vector per queue. */ err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, VP_VQ_VECTOR_POLICY_EACH, desc); if (!err) return 0; /* Fallback: MSI-X with one shared vector for config and * slow path queues, one vector per queue for the rest. */ err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, VP_VQ_VECTOR_POLICY_SHARED_SLOW, desc); if (!err) return 0; /* Fallback: MSI-X with one vector for config, one shared for queues. */ err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, VP_VQ_VECTOR_POLICY_SHARED, desc); if (!err) return 0; /* Is there an interrupt? If not give up. */ if (!(to_vp_device(vdev)->pci_dev->irq)) return err; /* Finally fall back to regular interrupts. */ return vp_find_vqs_intx(vdev, nvqs, vqs, vqs_info); } const char *vp_bus_name(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); return pci_name(vp_dev->pci_dev); } /* Setup the affinity for a virtqueue: * - force the affinity for per vq vector * - OR over all affinities for shared MSI * - ignore the affinity request if we're using INTX */ int vp_set_vq_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask) { struct virtio_device *vdev = vq->vdev; struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; struct cpumask *mask; unsigned int irq; if (!vq->callback) return -EINVAL; if (vp_dev->msix_enabled) { mask = vp_dev->msix_affinity_masks[info->msix_vector]; irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector); if (!cpu_mask) irq_update_affinity_hint(irq, NULL); else { cpumask_copy(mask, cpu_mask); irq_set_affinity_and_hint(irq, mask); } } return 0; } const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); if (!vp_dev->per_vq_vectors || vp_dev->vqs[index]->msix_vector == VIRTIO_MSI_NO_VECTOR || vp_is_slow_path_vector(vp_dev->vqs[index]->msix_vector)) return NULL; return pci_irq_get_affinity(vp_dev->pci_dev, vp_dev->vqs[index]->msix_vector); } #ifdef CONFIG_PM_SLEEP static int virtio_pci_freeze(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); int ret; ret = virtio_device_freeze(&vp_dev->vdev); if (!ret) pci_disable_device(pci_dev); return ret; } static int virtio_pci_restore(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); int ret; ret = pci_enable_device(pci_dev); if (ret) return ret; pci_set_master(pci_dev); return virtio_device_restore(&vp_dev->vdev); } static bool vp_supports_pm_no_reset(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); u16 pmcsr; if (!pci_dev->pm_cap) return false; pci_read_config_word(pci_dev, pci_dev->pm_cap + PCI_PM_CTRL, &pmcsr); if (PCI_POSSIBLE_ERROR(pmcsr)) { dev_err(dev, "Unable to query pmcsr"); return false; } return pmcsr & PCI_PM_CTRL_NO_SOFT_RESET; } static int virtio_pci_suspend(struct device *dev) { return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_freeze(dev); } static int virtio_pci_resume(struct device *dev) { return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_restore(dev); } static const struct dev_pm_ops virtio_pci_pm_ops = { .suspend = virtio_pci_suspend, .resume = virtio_pci_resume, .freeze = virtio_pci_freeze, .thaw = virtio_pci_restore, .poweroff = virtio_pci_freeze, .restore = virtio_pci_restore, }; #endif /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ static const struct pci_device_id virtio_pci_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_REDHAT_QUMRANET, PCI_ANY_ID) }, { 0 } }; MODULE_DEVICE_TABLE(pci, virtio_pci_id_table); static void virtio_pci_release_dev(struct device *_d) { struct virtio_device *vdev = dev_to_virtio(_d); struct virtio_pci_device *vp_dev = to_vp_device(vdev); /* As struct device is a kobject, it's not safe to * free the memory (including the reference counter itself) * until it's release callback. */ kfree(vp_dev); } static int virtio_pci_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) { struct virtio_pci_device *vp_dev, *reg_dev = NULL; int rc; /* allocate our structure and fill it out */ vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL); if (!vp_dev) return -ENOMEM; pci_set_drvdata(pci_dev, vp_dev); vp_dev->vdev.dev.parent = &pci_dev->dev; vp_dev->vdev.dev.release = virtio_pci_release_dev; vp_dev->pci_dev = pci_dev; INIT_LIST_HEAD(&vp_dev->virtqueues); INIT_LIST_HEAD(&vp_dev->slow_virtqueues); spin_lock_init(&vp_dev->lock); /* enable the device */ rc = pci_enable_device(pci_dev); if (rc) goto err_enable_device; if (force_legacy) { rc = virtio_pci_legacy_probe(vp_dev); /* Also try modern mode if we can't map BAR0 (no IO space). */ if (rc == -ENODEV || rc == -ENOMEM) rc = virtio_pci_modern_probe(vp_dev); if (rc) goto err_probe; } else { rc = virtio_pci_modern_probe(vp_dev); if (rc == -ENODEV) rc = virtio_pci_legacy_probe(vp_dev); if (rc) goto err_probe; } pci_set_master(pci_dev); rc = register_virtio_device(&vp_dev->vdev); reg_dev = vp_dev; if (rc) goto err_register; return 0; err_register: if (vp_dev->is_legacy) virtio_pci_legacy_remove(vp_dev); else virtio_pci_modern_remove(vp_dev); err_probe: pci_disable_device(pci_dev); err_enable_device: if (reg_dev) put_device(&vp_dev->vdev.dev); else kfree(vp_dev); return rc; } static void virtio_pci_remove(struct pci_dev *pci_dev) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); struct device *dev = get_device(&vp_dev->vdev.dev); /* * Device is marked broken on surprise removal so that virtio upper * layers can abort any ongoing operation. */ if (!pci_device_is_present(pci_dev)) virtio_break_device(&vp_dev->vdev); pci_disable_sriov(pci_dev); unregister_virtio_device(&vp_dev->vdev); if (vp_dev->is_legacy) virtio_pci_legacy_remove(vp_dev); else virtio_pci_modern_remove(vp_dev); pci_disable_device(pci_dev); put_device(dev); } static int virtio_pci_sriov_configure(struct pci_dev *pci_dev, int num_vfs) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); struct virtio_device *vdev = &vp_dev->vdev; int ret; if (!(vdev->config->get_status(vdev) & VIRTIO_CONFIG_S_DRIVER_OK)) return -EBUSY; if (!__virtio_test_bit(vdev, VIRTIO_F_SR_IOV)) return -EINVAL; if (pci_vfs_assigned(pci_dev)) return -EPERM; if (num_vfs == 0) { pci_disable_sriov(pci_dev); return 0; } ret = pci_enable_sriov(pci_dev, num_vfs); if (ret < 0) return ret; return num_vfs; } static void virtio_pci_reset_prepare(struct pci_dev *pci_dev) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); int ret = 0; ret = virtio_device_reset_prepare(&vp_dev->vdev); if (ret) { if (ret != -EOPNOTSUPP) dev_warn(&pci_dev->dev, "Reset prepare failure: %d", ret); return; } if (pci_is_enabled(pci_dev)) pci_disable_device(pci_dev); } static void virtio_pci_reset_done(struct pci_dev *pci_dev) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); int ret; if (pci_is_enabled(pci_dev)) return; ret = pci_enable_device(pci_dev); if (!ret) { pci_set_master(pci_dev); ret = virtio_device_reset_done(&vp_dev->vdev); } if (ret && ret != -EOPNOTSUPP) dev_warn(&pci_dev->dev, "Reset done failure: %d", ret); } static const struct pci_error_handlers virtio_pci_err_handler = { .reset_prepare = virtio_pci_reset_prepare, .reset_done = virtio_pci_reset_done, }; static struct pci_driver virtio_pci_driver = { .name = "virtio-pci", .id_table = virtio_pci_id_table, .probe = virtio_pci_probe, .remove = virtio_pci_remove, #ifdef CONFIG_PM_SLEEP .driver.pm = &virtio_pci_pm_ops, #endif .sriov_configure = virtio_pci_sriov_configure, .err_handler = &virtio_pci_err_handler, }; struct virtio_device *virtio_pci_vf_get_pf_dev(struct pci_dev *pdev) { struct virtio_pci_device *pf_vp_dev; pf_vp_dev = pci_iov_get_pf_drvdata(pdev, &virtio_pci_driver); if (IS_ERR(pf_vp_dev)) return NULL; return &pf_vp_dev->vdev; } module_pci_driver(virtio_pci_driver); MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>"); MODULE_DESCRIPTION("virtio-pci"); MODULE_LICENSE("GPL"); MODULE_VERSION("1"); |
| 16 15 14 13 13 12 12 12 11 10 10 5 5 2 16 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2022 Pablo Neira Ayuso <pablo@netfilter.org> */ #include <linux/kernel.h> #include <linux/if_vlan.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nft_meta.h> #include <net/netfilter/nf_tables_offload.h> #include <linux/tcp.h> #include <linux/udp.h> #include <net/gre.h> #include <net/geneve.h> #include <net/ip.h> #include <linux/icmpv6.h> #include <linux/ip.h> #include <linux/ipv6.h> struct nft_inner_tun_ctx_locked { struct nft_inner_tun_ctx ctx; local_lock_t bh_lock; }; static DEFINE_PER_CPU(struct nft_inner_tun_ctx_locked, nft_pcpu_tun_ctx) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; /* Same layout as nft_expr but it embeds the private expression data area. */ struct __nft_expr { const struct nft_expr_ops *ops; union { struct nft_payload payload; struct nft_meta meta; } __attribute__((aligned(__alignof__(u64)))); }; enum { NFT_INNER_EXPR_PAYLOAD, NFT_INNER_EXPR_META, }; struct nft_inner { u8 flags; u8 hdrsize; u8 type; u8 expr_type; struct __nft_expr expr; }; static int nft_inner_parse_l2l3(const struct nft_inner *priv, const struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *ctx, u32 off) { __be16 llproto, outer_llproto; u32 nhoff, thoff; if (priv->flags & NFT_INNER_LL) { struct vlan_ethhdr *veth, _veth; struct ethhdr *eth, _eth; u32 hdrsize; eth = skb_header_pointer(pkt->skb, off, sizeof(_eth), &_eth); if (!eth) return -1; switch (eth->h_proto) { case htons(ETH_P_IP): case htons(ETH_P_IPV6): llproto = eth->h_proto; hdrsize = sizeof(_eth); break; case htons(ETH_P_8021Q): veth = skb_header_pointer(pkt->skb, off, sizeof(_veth), &_veth); if (!veth) return -1; outer_llproto = veth->h_vlan_encapsulated_proto; llproto = veth->h_vlan_proto; hdrsize = sizeof(_veth); break; default: return -1; } ctx->inner_lloff = off; ctx->flags |= NFT_PAYLOAD_CTX_INNER_LL; off += hdrsize; } else { struct iphdr *iph; u32 _version; iph = skb_header_pointer(pkt->skb, off, sizeof(_version), &_version); if (!iph) return -1; switch (iph->version) { case 4: llproto = htons(ETH_P_IP); break; case 6: llproto = htons(ETH_P_IPV6); break; default: return -1; } } ctx->llproto = llproto; if (llproto == htons(ETH_P_8021Q)) llproto = outer_llproto; nhoff = off; switch (llproto) { case htons(ETH_P_IP): { struct iphdr *iph, _iph; iph = skb_header_pointer(pkt->skb, nhoff, sizeof(_iph), &_iph); if (!iph) return -1; if (iph->ihl < 5 || iph->version != 4) return -1; ctx->inner_nhoff = nhoff; ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH; thoff = nhoff + (iph->ihl * 4); if ((ntohs(iph->frag_off) & IP_OFFSET) == 0) { ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; ctx->inner_thoff = thoff; ctx->l4proto = iph->protocol; } } break; case htons(ETH_P_IPV6): { struct ipv6hdr *ip6h, _ip6h; int fh_flags = IP6_FH_F_AUTH; unsigned short fragoff; int l4proto; ip6h = skb_header_pointer(pkt->skb, nhoff, sizeof(_ip6h), &_ip6h); if (!ip6h) return -1; if (ip6h->version != 6) return -1; ctx->inner_nhoff = nhoff; ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH; thoff = nhoff; l4proto = ipv6_find_hdr(pkt->skb, &thoff, -1, &fragoff, &fh_flags); if (l4proto < 0 || thoff > U16_MAX) return -1; if (fragoff == 0) { thoff = nhoff + sizeof(_ip6h); ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; ctx->inner_thoff = thoff; ctx->l4proto = l4proto; } } break; default: return -1; } return 0; } static int nft_inner_parse_tunhdr(const struct nft_inner *priv, const struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *ctx, u32 *off) { if (pkt->tprot == IPPROTO_GRE) { ctx->inner_tunoff = pkt->thoff; ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN; return 0; } if (pkt->tprot != IPPROTO_UDP) return -1; ctx->inner_tunoff = *off; ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN; *off += priv->hdrsize; switch (priv->type) { case NFT_INNER_GENEVE: { struct genevehdr *gnvh, _gnvh; gnvh = skb_header_pointer(pkt->skb, pkt->inneroff, sizeof(_gnvh), &_gnvh); if (!gnvh) return -1; *off += gnvh->opt_len * 4; } break; default: break; } return 0; } static int nft_inner_parse(const struct nft_inner *priv, struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *tun_ctx) { u32 off = pkt->inneroff; if (priv->flags & NFT_INNER_HDRSIZE && nft_inner_parse_tunhdr(priv, pkt, tun_ctx, &off) < 0) return -1; if (priv->flags & (NFT_INNER_LL | NFT_INNER_NH)) { if (nft_inner_parse_l2l3(priv, pkt, tun_ctx, off) < 0) return -1; } else if (priv->flags & NFT_INNER_TH) { tun_ctx->inner_thoff = off; tun_ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; } tun_ctx->type = priv->type; tun_ctx->cookie = (unsigned long)pkt->skb; pkt->flags |= NFT_PKTINFO_INNER_FULL; return 0; } static bool nft_inner_restore_tun_ctx(const struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *tun_ctx) { struct nft_inner_tun_ctx *this_cpu_tun_ctx; local_bh_disable(); local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx); if (this_cpu_tun_ctx->cookie != (unsigned long)pkt->skb) { local_bh_enable(); local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); return false; } *tun_ctx = *this_cpu_tun_ctx; local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); local_bh_enable(); return true; } static void nft_inner_save_tun_ctx(const struct nft_pktinfo *pkt, const struct nft_inner_tun_ctx *tun_ctx) { struct nft_inner_tun_ctx *this_cpu_tun_ctx; local_bh_disable(); local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx); if (this_cpu_tun_ctx->cookie != tun_ctx->cookie) *this_cpu_tun_ctx = *tun_ctx; local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); local_bh_enable(); } static bool nft_inner_parse_needed(const struct nft_inner *priv, const struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *tun_ctx) { if (!(pkt->flags & NFT_PKTINFO_INNER_FULL)) return true; if (!nft_inner_restore_tun_ctx(pkt, tun_ctx)) return true; if (priv->type != tun_ctx->type) return true; return false; } static void nft_inner_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_inner *priv = nft_expr_priv(expr); struct nft_inner_tun_ctx tun_ctx = {}; if (nft_payload_inner_offset(pkt) < 0) goto err; if (nft_inner_parse_needed(priv, pkt, &tun_ctx) && nft_inner_parse(priv, (struct nft_pktinfo *)pkt, &tun_ctx) < 0) goto err; switch (priv->expr_type) { case NFT_INNER_EXPR_PAYLOAD: nft_payload_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx); break; case NFT_INNER_EXPR_META: nft_meta_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx); break; default: WARN_ON_ONCE(1); goto err; } nft_inner_save_tun_ctx(pkt, &tun_ctx); return; err: regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_inner_policy[NFTA_INNER_MAX + 1] = { [NFTA_INNER_NUM] = { .type = NLA_U32 }, [NFTA_INNER_FLAGS] = { .type = NLA_U32 }, [NFTA_INNER_HDRSIZE] = { .type = NLA_U32 }, [NFTA_INNER_TYPE] = { .type = NLA_U32 }, [NFTA_INNER_EXPR] = { .type = NLA_NESTED }, }; struct nft_expr_info { const struct nft_expr_ops *ops; const struct nlattr *attr; struct nlattr *tb[NFT_EXPR_MAXATTR + 1]; }; static int nft_inner_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_inner *priv = nft_expr_priv(expr); u32 flags, hdrsize, type, num; struct nft_expr_info expr_info; int err; if (!tb[NFTA_INNER_FLAGS] || !tb[NFTA_INNER_NUM] || !tb[NFTA_INNER_HDRSIZE] || !tb[NFTA_INNER_TYPE] || !tb[NFTA_INNER_EXPR]) return -EINVAL; flags = ntohl(nla_get_be32(tb[NFTA_INNER_FLAGS])); if (flags & ~NFT_INNER_MASK) return -EOPNOTSUPP; num = ntohl(nla_get_be32(tb[NFTA_INNER_NUM])); if (num != 0) return -EOPNOTSUPP; hdrsize = ntohl(nla_get_be32(tb[NFTA_INNER_HDRSIZE])); type = ntohl(nla_get_be32(tb[NFTA_INNER_TYPE])); if (type > U8_MAX) return -EINVAL; if (flags & NFT_INNER_HDRSIZE) { if (hdrsize == 0 || hdrsize > 64) return -EOPNOTSUPP; } priv->flags = flags; priv->hdrsize = hdrsize; priv->type = type; err = nft_expr_inner_parse(ctx, tb[NFTA_INNER_EXPR], &expr_info); if (err < 0) return err; priv->expr.ops = expr_info.ops; if (!strcmp(expr_info.ops->type->name, "payload")) priv->expr_type = NFT_INNER_EXPR_PAYLOAD; else if (!strcmp(expr_info.ops->type->name, "meta")) priv->expr_type = NFT_INNER_EXPR_META; else return -EINVAL; err = expr_info.ops->init(ctx, (struct nft_expr *)&priv->expr, (const struct nlattr * const*)expr_info.tb); if (err < 0) return err; return 0; } static int nft_inner_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_inner *priv = nft_expr_priv(expr); if (nla_put_be32(skb, NFTA_INNER_NUM, htonl(0)) || nla_put_be32(skb, NFTA_INNER_TYPE, htonl(priv->type)) || nla_put_be32(skb, NFTA_INNER_FLAGS, htonl(priv->flags)) || nla_put_be32(skb, NFTA_INNER_HDRSIZE, htonl(priv->hdrsize))) goto nla_put_failure; if (nft_expr_dump(skb, NFTA_INNER_EXPR, (struct nft_expr *)&priv->expr, reset) < 0) goto nla_put_failure; return 0; nla_put_failure: return -1; } static const struct nft_expr_ops nft_inner_ops = { .type = &nft_inner_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_inner)), .eval = nft_inner_eval, .init = nft_inner_init, .dump = nft_inner_dump, }; struct nft_expr_type nft_inner_type __read_mostly = { .name = "inner", .ops = &nft_inner_ops, .policy = nft_inner_policy, .maxattr = NFTA_INNER_MAX, .owner = THIS_MODULE, }; |
| 16 47 47 47 47 190 749 45 724 228 751 350 392 29 749 749 751 285 288 639 640 59 658 245 7 7 4 3 7 7 7 36 36 36 36 36 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_NETFILTER_H #define __LINUX_NETFILTER_H #include <linux/init.h> #include <linux/skbuff.h> #include <linux/net.h> #include <linux/if.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/wait.h> #include <linux/list.h> #include <linux/static_key.h> #include <linux/module.h> #include <linux/netfilter_defs.h> #include <linux/netdevice.h> #include <linux/sockptr.h> #include <net/net_namespace.h> static inline int NF_DROP_GETERR(int verdict) { return -(verdict >> NF_VERDICT_QBITS); } static __always_inline int NF_DROP_REASON(struct sk_buff *skb, enum skb_drop_reason reason, u32 err) { BUILD_BUG_ON(err > 0xffff); kfree_skb_reason(skb, reason); return ((err << 16) | NF_STOLEN); } static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *a2) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 const unsigned long *ul1 = (const unsigned long *)a1; const unsigned long *ul2 = (const unsigned long *)a2; return ((ul1[0] ^ ul2[0]) | (ul1[1] ^ ul2[1])) == 0UL; #else return a1->all[0] == a2->all[0] && a1->all[1] == a2->all[1] && a1->all[2] == a2->all[2] && a1->all[3] == a2->all[3]; #endif } static inline void nf_inet_addr_mask(const union nf_inet_addr *a1, union nf_inet_addr *result, const union nf_inet_addr *mask) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 const unsigned long *ua = (const unsigned long *)a1; unsigned long *ur = (unsigned long *)result; const unsigned long *um = (const unsigned long *)mask; ur[0] = ua[0] & um[0]; ur[1] = ua[1] & um[1]; #else result->all[0] = a1->all[0] & mask->all[0]; result->all[1] = a1->all[1] & mask->all[1]; result->all[2] = a1->all[2] & mask->all[2]; result->all[3] = a1->all[3] & mask->all[3]; #endif } int netfilter_init(void); struct sk_buff; struct nf_hook_ops; struct sock; struct nf_hook_state { u8 hook; u8 pf; struct net_device *in; struct net_device *out; struct sock *sk; struct net *net; int (*okfn)(struct net *, struct sock *, struct sk_buff *); }; typedef unsigned int nf_hookfn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); enum nf_hook_ops_type { NF_HOOK_OP_UNDEFINED, NF_HOOK_OP_NF_TABLES, NF_HOOK_OP_BPF, }; struct nf_hook_ops { struct list_head list; struct rcu_head rcu; /* User fills in from here down. */ nf_hookfn *hook; struct net_device *dev; void *priv; u8 pf; enum nf_hook_ops_type hook_ops_type:8; unsigned int hooknum; /* Hooks are ordered in ascending priority. */ int priority; }; struct nf_hook_entry { nf_hookfn *hook; void *priv; }; struct nf_hook_entries_rcu_head { struct rcu_head head; void *allocation; }; struct nf_hook_entries { u16 num_hook_entries; /* padding */ struct nf_hook_entry hooks[]; /* trailer: pointers to original orig_ops of each hook, * followed by rcu_head and scratch space used for freeing * the structure via call_rcu. * * This is not part of struct nf_hook_entry since its only * needed in slow path (hook register/unregister): * const struct nf_hook_ops *orig_ops[] * * For the same reason, we store this at end -- its * only needed when a hook is deleted, not during * packet path processing: * struct nf_hook_entries_rcu_head head */ }; #ifdef CONFIG_NETFILTER static inline struct nf_hook_ops **nf_hook_entries_get_hook_ops(const struct nf_hook_entries *e) { unsigned int n = e->num_hook_entries; const void *hook_end; hook_end = &e->hooks[n]; /* this is *past* ->hooks[]! */ return (struct nf_hook_ops **)hook_end; } static inline int nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb, struct nf_hook_state *state) { return entry->hook(entry->priv, skb, state); } static inline void nf_hook_state_init(struct nf_hook_state *p, unsigned int hook, u_int8_t pf, struct net_device *indev, struct net_device *outdev, struct sock *sk, struct net *net, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { p->hook = hook; p->pf = pf; p->in = indev; p->out = outdev; p->sk = sk; p->net = net; p->okfn = okfn; } struct nf_sockopt_ops { struct list_head list; u_int8_t pf; /* Non-inclusive ranges: use 0/0/NULL to never get called. */ int set_optmin; int set_optmax; int (*set)(struct sock *sk, int optval, sockptr_t arg, unsigned int len); int get_optmin; int get_optmax; int (*get)(struct sock *sk, int optval, void __user *user, int *len); /* Use the module struct to lock set/get code in place */ struct module *owner; }; /* Function to register/unregister hook points. */ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *ops); void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *ops); int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg, unsigned int n); void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, unsigned int n); /* Functions to register get/setsockopt ranges (non-inclusive). You need to check permissions yourself! */ int nf_register_sockopt(struct nf_sockopt_ops *reg); void nf_unregister_sockopt(struct nf_sockopt_ops *reg); #ifdef CONFIG_JUMP_LABEL extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #endif int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, const struct nf_hook_entries *e, unsigned int i); void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state, const struct nf_hook_entries *e); /** * nf_hook - call a netfilter hook * * Returns 1 if the hook has allowed the packet to pass. The function * okfn must be invoked by the caller in this case. Any other return * value indicates the packet has been consumed by the hook. */ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { struct nf_hook_entries *hook_head = NULL; int ret = 1; #ifdef CONFIG_JUMP_LABEL if (__builtin_constant_p(pf) && __builtin_constant_p(hook) && !static_key_false(&nf_hooks_needed[pf][hook])) return 1; #endif rcu_read_lock(); switch (pf) { case NFPROTO_IPV4: hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]); break; case NFPROTO_IPV6: hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]); break; case NFPROTO_ARP: #ifdef CONFIG_NETFILTER_FAMILY_ARP if (WARN_ON_ONCE(hook >= ARRAY_SIZE(net->nf.hooks_arp))) break; hook_head = rcu_dereference(net->nf.hooks_arp[hook]); #endif break; case NFPROTO_BRIDGE: #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE hook_head = rcu_dereference(net->nf.hooks_bridge[hook]); #endif break; default: WARN_ON_ONCE(1); break; } if (hook_head) { struct nf_hook_state state; nf_hook_state_init(&state, hook, pf, indev, outdev, sk, net, okfn); ret = nf_hook_slow(skb, &state, hook_head, 0); } rcu_read_unlock(); return ret; } /* Activate hook; either okfn or kfree_skb called, unless a hook returns NF_STOLEN (in which case, it's up to the hook to deal with the consequences). Returns -ERRNO if packet dropped. Zero means queued, stolen or accepted. */ /* RR: > I don't want nf_hook to return anything because people might forget > about async and trust the return value to mean "packet was ok". AK: Just document it clearly, then you can expect some sense from kernel coders :) */ static inline int NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *), bool cond) { int ret; if (!cond || ((ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn)) == 1)) ret = okfn(net, sk, skb); return ret; } static inline int NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn); if (ret == 1) ret = okfn(net, sk, skb); return ret; } static inline void NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct list_head *head, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { struct nf_hook_entries *hook_head = NULL; #ifdef CONFIG_JUMP_LABEL if (__builtin_constant_p(pf) && __builtin_constant_p(hook) && !static_key_false(&nf_hooks_needed[pf][hook])) return; #endif rcu_read_lock(); switch (pf) { case NFPROTO_IPV4: hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]); break; case NFPROTO_IPV6: hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]); break; default: WARN_ON_ONCE(1); break; } if (hook_head) { struct nf_hook_state state; nf_hook_state_init(&state, hook, pf, in, out, sk, net, okfn); nf_hook_slow_list(head, &state, hook_head); } rcu_read_unlock(); } /* Call setsockopt() */ int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, sockptr_t opt, unsigned int len); int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); struct flowi; struct nf_queue_entry; __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol, unsigned short family); __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, unsigned int len, u_int8_t protocol, unsigned short family); int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict, unsigned short family); #include <net/flow.h> struct nf_conn; enum nf_nat_manip_type; struct nlattr; struct nf_nat_hook { int (*parse_nat_setup)(struct nf_conn *ct, enum nf_nat_manip_type manip, const struct nlattr *attr); void (*decode_session)(struct sk_buff *skb, struct flowi *fl); void (*remove_nat_bysrc)(struct nf_conn *ct); }; extern const struct nf_nat_hook __rcu *nf_nat_hook; static inline void nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) { #if IS_ENABLED(CONFIG_NF_NAT) const struct nf_nat_hook *nat_hook; rcu_read_lock(); nat_hook = rcu_dereference(nf_nat_hook); if (nat_hook && nat_hook->decode_session) nat_hook->decode_session(skb, fl); rcu_read_unlock(); #endif } #else /* !CONFIG_NETFILTER */ static inline int NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *), bool cond) { return okfn(net, sk, skb); } static inline int NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { return okfn(net, sk, skb); } static inline void NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct list_head *head, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { /* nothing to do */ } static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { return 1; } struct flowi; static inline void nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) { } #endif /*CONFIG_NETFILTER*/ #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <linux/netfilter/nf_conntrack_zones_common.h> void nf_ct_attach(struct sk_buff *, const struct sk_buff *); void nf_ct_set_closing(struct nf_conntrack *nfct); struct nf_conntrack_tuple; bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb); #else static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} static inline void nf_ct_set_closing(struct nf_conntrack *nfct) {} struct nf_conntrack_tuple; static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb) { return false; } #endif struct nf_conn; enum ip_conntrack_info; struct nf_ct_hook { int (*update)(struct net *net, struct sk_buff *skb); void (*destroy)(struct nf_conntrack *); bool (*get_tuple_skb)(struct nf_conntrack_tuple *, const struct sk_buff *); void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); void (*set_closing)(struct nf_conntrack *nfct); int (*confirm)(struct sk_buff *skb); u32 (*get_id)(const struct nf_conntrack *nfct); }; extern const struct nf_ct_hook __rcu *nf_ct_hook; struct nlattr; struct nfnl_ct_hook { size_t (*build_size)(const struct nf_conn *ct); int (*build)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, u_int16_t ct_attr, u_int16_t ct_info_attr); int (*parse)(const struct nlattr *attr, struct nf_conn *ct); int (*attach_expect)(const struct nlattr *attr, struct nf_conn *ct, u32 portid, u32 report); void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, s32 off); }; extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook; struct nf_defrag_hook { struct module *owner; int (*enable)(struct net *net); void (*disable)(struct net *net); }; extern const struct nf_defrag_hook __rcu *nf_defrag_v4_hook; extern const struct nf_defrag_hook __rcu *nf_defrag_v6_hook; /* * Contains bitmask of ctnetlink event subscribers, if any. * Can't be pernet due to NETLINK_LISTEN_ALL_NSID setsockopt flag. */ extern u8 nf_ctnetlink_has_listener; #endif /*__LINUX_NETFILTER_H*/ |
| 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | // SPDX-License-Identifier: GPL-2.0-only /* * Driver for RobotFuzz OSIF * * Copyright (c) 2013 Andrew Lunn <andrew@lunn.ch> * Copyright (c) 2007 Barry Carter <Barry.Carter@robotfuzz.com> * * Based on the i2c-tiny-usb by * * Copyright (C) 2006 Til Harbaum (Till@Harbaum.org) */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/i2c.h> #include <linux/slab.h> #include <linux/usb.h> #define OSIFI2C_READ 20 #define OSIFI2C_WRITE 21 #define OSIFI2C_STOP 22 #define OSIFI2C_STATUS 23 #define OSIFI2C_SET_BIT_RATE 24 #define STATUS_ADDRESS_ACK 0 #define STATUS_ADDRESS_NAK 2 struct osif_priv { struct usb_device *usb_dev; struct usb_interface *interface; struct i2c_adapter adapter; unsigned char status; }; static int osif_usb_read(struct i2c_adapter *adapter, int cmd, int value, int index, void *data, int len) { struct osif_priv *priv = adapter->algo_data; return usb_control_msg(priv->usb_dev, usb_rcvctrlpipe(priv->usb_dev, 0), cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_IN, value, index, data, len, 2000); } static int osif_usb_write(struct i2c_adapter *adapter, int cmd, int value, int index, void *data, int len) { struct osif_priv *priv = adapter->algo_data; return usb_control_msg(priv->usb_dev, usb_sndctrlpipe(priv->usb_dev, 0), cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE, value, index, data, len, 2000); } static int osif_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) { struct osif_priv *priv = adapter->algo_data; struct i2c_msg *pmsg; int ret; int i; for (i = 0; i < num; i++) { pmsg = &msgs[i]; if (pmsg->flags & I2C_M_RD) { ret = osif_usb_read(adapter, OSIFI2C_READ, pmsg->flags, pmsg->addr, pmsg->buf, pmsg->len); if (ret != pmsg->len) { dev_err(&adapter->dev, "failure reading data\n"); return -EREMOTEIO; } } else { ret = osif_usb_write(adapter, OSIFI2C_WRITE, pmsg->flags, pmsg->addr, pmsg->buf, pmsg->len); if (ret != pmsg->len) { dev_err(&adapter->dev, "failure writing data\n"); return -EREMOTEIO; } } ret = osif_usb_write(adapter, OSIFI2C_STOP, 0, 0, NULL, 0); if (ret) { dev_err(&adapter->dev, "failure sending STOP\n"); return -EREMOTEIO; } /* read status */ ret = osif_usb_read(adapter, OSIFI2C_STATUS, 0, 0, &priv->status, 1); if (ret != 1) { dev_err(&adapter->dev, "failure reading status\n"); return -EREMOTEIO; } if (priv->status != STATUS_ADDRESS_ACK) { dev_dbg(&adapter->dev, "status = %d\n", priv->status); return -EREMOTEIO; } } return i; } static u32 osif_func(struct i2c_adapter *adapter) { return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; } static const struct i2c_algorithm osif_algorithm = { .xfer = osif_xfer, .functionality = osif_func, }; #define USB_OSIF_VENDOR_ID 0x1964 #define USB_OSIF_PRODUCT_ID 0x0001 static const struct usb_device_id osif_table[] = { { USB_DEVICE(USB_OSIF_VENDOR_ID, USB_OSIF_PRODUCT_ID) }, { } }; MODULE_DEVICE_TABLE(usb, osif_table); static int osif_probe(struct usb_interface *interface, const struct usb_device_id *id) { int ret; struct osif_priv *priv; u16 version; priv = devm_kzalloc(&interface->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; priv->usb_dev = usb_get_dev(interface_to_usbdev(interface)); priv->interface = interface; usb_set_intfdata(interface, priv); priv->adapter.owner = THIS_MODULE; priv->adapter.class = I2C_CLASS_HWMON; priv->adapter.algo = &osif_algorithm; priv->adapter.algo_data = priv; snprintf(priv->adapter.name, sizeof(priv->adapter.name), "OSIF at bus %03d device %03d", priv->usb_dev->bus->busnum, priv->usb_dev->devnum); /* * Set bus frequency. The frequency is: * 120,000,000 / ( 16 + 2 * div * 4^prescale). * Using dev = 52, prescale = 0 give 100KHz */ ret = osif_usb_write(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0, NULL, 0); if (ret) { dev_err(&interface->dev, "failure sending bit rate"); usb_put_dev(priv->usb_dev); return ret; } i2c_add_adapter(&(priv->adapter)); version = le16_to_cpu(priv->usb_dev->descriptor.bcdDevice); dev_info(&interface->dev, "version %x.%02x found at bus %03d address %03d", version >> 8, version & 0xff, priv->usb_dev->bus->busnum, priv->usb_dev->devnum); return 0; } static void osif_disconnect(struct usb_interface *interface) { struct osif_priv *priv = usb_get_intfdata(interface); i2c_del_adapter(&(priv->adapter)); usb_set_intfdata(interface, NULL); usb_put_dev(priv->usb_dev); } static struct usb_driver osif_driver = { .name = "RobotFuzz Open Source InterFace, OSIF", .probe = osif_probe, .disconnect = osif_disconnect, .id_table = osif_table, }; module_usb_driver(osif_driver); MODULE_AUTHOR("Andrew Lunn <andrew@lunn.ch>"); MODULE_AUTHOR("Barry Carter <barry.carter@robotfuzz.com>"); MODULE_DESCRIPTION("RobotFuzz OSIF driver"); MODULE_LICENSE("GPL v2"); |
| 1 1 23 18 23 2 25 25 1 24 20 6 20 1 23 2 22 19 1 21 1 14 3 3 1 3 1 25 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 5 1 4 4 3 3 5 3 3 3 3 3 16 15 12 14 3 2 2 11 2 1 9 11 1 11 11 16 1 9 10 10 2 9 9 10 9 1 1 10 10 9 23 22 22 22 16 5 11 2 9 21 23 7 6 5 5 2 1 1 5 7 8 8 6 8 5 3 3 3 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2010 IBM Corporation * Copyright (C) 2010 Politecnico di Torino, Italy * TORSEC group -- https://security.polito.it * * Authors: * Mimi Zohar <zohar@us.ibm.com> * Roberto Sassu <roberto.sassu@polito.it> * * See Documentation/security/keys/trusted-encrypted.rst */ #include <linux/uaccess.h> #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/parser.h> #include <linux/string.h> #include <linux/err.h> #include <keys/user-type.h> #include <keys/trusted-type.h> #include <keys/encrypted-type.h> #include <linux/key-type.h> #include <linux/random.h> #include <linux/rcupdate.h> #include <linux/scatterlist.h> #include <linux/ctype.h> #include <crypto/aes.h> #include <crypto/hash.h> #include <crypto/sha2.h> #include <crypto/skcipher.h> #include <crypto/utils.h> #include "encrypted.h" #include "ecryptfs_format.h" static const char KEY_TRUSTED_PREFIX[] = "trusted:"; static const char KEY_USER_PREFIX[] = "user:"; static const char hash_alg[] = "sha256"; static const char hmac_alg[] = "hmac(sha256)"; static const char blkcipher_alg[] = "cbc(aes)"; static const char key_format_default[] = "default"; static const char key_format_ecryptfs[] = "ecryptfs"; static const char key_format_enc32[] = "enc32"; static unsigned int ivsize; static int blksize; #define KEY_TRUSTED_PREFIX_LEN (sizeof (KEY_TRUSTED_PREFIX) - 1) #define KEY_USER_PREFIX_LEN (sizeof (KEY_USER_PREFIX) - 1) #define KEY_ECRYPTFS_DESC_LEN 16 #define HASH_SIZE SHA256_DIGEST_SIZE #define MAX_DATA_SIZE 4096 #define MIN_DATA_SIZE 20 #define KEY_ENC32_PAYLOAD_LEN 32 static struct crypto_shash *hash_tfm; enum { Opt_new, Opt_load, Opt_update, Opt_err }; enum { Opt_default, Opt_ecryptfs, Opt_enc32, Opt_error }; static const match_table_t key_format_tokens = { {Opt_default, "default"}, {Opt_ecryptfs, "ecryptfs"}, {Opt_enc32, "enc32"}, {Opt_error, NULL} }; static const match_table_t key_tokens = { {Opt_new, "new"}, {Opt_load, "load"}, {Opt_update, "update"}, {Opt_err, NULL} }; static bool user_decrypted_data = IS_ENABLED(CONFIG_USER_DECRYPTED_DATA); module_param(user_decrypted_data, bool, 0); MODULE_PARM_DESC(user_decrypted_data, "Allow instantiation of encrypted keys using provided decrypted data"); static int aes_get_sizes(void) { struct crypto_skcipher *tfm; tfm = crypto_alloc_skcipher(blkcipher_alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { pr_err("encrypted_key: failed to alloc_cipher (%ld)\n", PTR_ERR(tfm)); return PTR_ERR(tfm); } ivsize = crypto_skcipher_ivsize(tfm); blksize = crypto_skcipher_blocksize(tfm); crypto_free_skcipher(tfm); return 0; } /* * valid_ecryptfs_desc - verify the description of a new/loaded encrypted key * * The description of a encrypted key with format 'ecryptfs' must contain * exactly 16 hexadecimal characters. * */ static int valid_ecryptfs_desc(const char *ecryptfs_desc) { int i; if (strlen(ecryptfs_desc) != KEY_ECRYPTFS_DESC_LEN) { pr_err("encrypted_key: key description must be %d hexadecimal " "characters long\n", KEY_ECRYPTFS_DESC_LEN); return -EINVAL; } for (i = 0; i < KEY_ECRYPTFS_DESC_LEN; i++) { if (!isxdigit(ecryptfs_desc[i])) { pr_err("encrypted_key: key description must contain " "only hexadecimal characters\n"); return -EINVAL; } } return 0; } /* * valid_master_desc - verify the 'key-type:desc' of a new/updated master-key * * key-type:= "trusted:" | "user:" * desc:= master-key description * * Verify that 'key-type' is valid and that 'desc' exists. On key update, * only the master key description is permitted to change, not the key-type. * The key-type remains constant. * * On success returns 0, otherwise -EINVAL. */ static int valid_master_desc(const char *new_desc, const char *orig_desc) { int prefix_len; if (!strncmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) prefix_len = KEY_TRUSTED_PREFIX_LEN; else if (!strncmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) prefix_len = KEY_USER_PREFIX_LEN; else return -EINVAL; if (!new_desc[prefix_len]) return -EINVAL; if (orig_desc && strncmp(new_desc, orig_desc, prefix_len)) return -EINVAL; return 0; } /* * datablob_parse - parse the keyctl data * * datablob format: * new [<format>] <master-key name> <decrypted data length> [<decrypted data>] * load [<format>] <master-key name> <decrypted data length> * <encrypted iv + data> * update <new-master-key name> * * Tokenizes a copy of the keyctl data, returning a pointer to each token, * which is null terminated. * * On success returns 0, otherwise -EINVAL. */ static int datablob_parse(char *datablob, const char **format, char **master_desc, char **decrypted_datalen, char **hex_encoded_iv, char **decrypted_data) { substring_t args[MAX_OPT_ARGS]; int ret = -EINVAL; int key_cmd; int key_format; char *p, *keyword; keyword = strsep(&datablob, " \t"); if (!keyword) { pr_info("encrypted_key: insufficient parameters specified\n"); return ret; } key_cmd = match_token(keyword, key_tokens, args); /* Get optional format: default | ecryptfs */ p = strsep(&datablob, " \t"); if (!p) { pr_err("encrypted_key: insufficient parameters specified\n"); return ret; } key_format = match_token(p, key_format_tokens, args); switch (key_format) { case Opt_ecryptfs: case Opt_enc32: case Opt_default: *format = p; *master_desc = strsep(&datablob, " \t"); break; case Opt_error: *master_desc = p; break; } if (!*master_desc) { pr_info("encrypted_key: master key parameter is missing\n"); goto out; } if (valid_master_desc(*master_desc, NULL) < 0) { pr_info("encrypted_key: master key parameter \'%s\' " "is invalid\n", *master_desc); goto out; } if (decrypted_datalen) { *decrypted_datalen = strsep(&datablob, " \t"); if (!*decrypted_datalen) { pr_info("encrypted_key: keylen parameter is missing\n"); goto out; } } switch (key_cmd) { case Opt_new: if (!decrypted_datalen) { pr_info("encrypted_key: keyword \'%s\' not allowed " "when called from .update method\n", keyword); break; } *decrypted_data = strsep(&datablob, " \t"); ret = 0; break; case Opt_load: if (!decrypted_datalen) { pr_info("encrypted_key: keyword \'%s\' not allowed " "when called from .update method\n", keyword); break; } *hex_encoded_iv = strsep(&datablob, " \t"); if (!*hex_encoded_iv) { pr_info("encrypted_key: hex blob is missing\n"); break; } ret = 0; break; case Opt_update: if (decrypted_datalen) { pr_info("encrypted_key: keyword \'%s\' not allowed " "when called from .instantiate method\n", keyword); break; } ret = 0; break; case Opt_err: pr_info("encrypted_key: keyword \'%s\' not recognized\n", keyword); break; } out: return ret; } /* * datablob_format - format as an ascii string, before copying to userspace */ static char *datablob_format(struct encrypted_key_payload *epayload, size_t asciiblob_len) { char *ascii_buf, *bufp; u8 *iv = epayload->iv; int len; int i; ascii_buf = kmalloc(asciiblob_len + 1, GFP_KERNEL); if (!ascii_buf) goto out; ascii_buf[asciiblob_len] = '\0'; /* copy datablob master_desc and datalen strings */ len = sprintf(ascii_buf, "%s %s %s ", epayload->format, epayload->master_desc, epayload->datalen); /* convert the hex encoded iv, encrypted-data and HMAC to ascii */ bufp = &ascii_buf[len]; for (i = 0; i < (asciiblob_len - len) / 2; i++) bufp = hex_byte_pack(bufp, iv[i]); out: return ascii_buf; } /* * request_user_key - request the user key * * Use a user provided key to encrypt/decrypt an encrypted-key. */ static struct key *request_user_key(const char *master_desc, const u8 **master_key, size_t *master_keylen) { const struct user_key_payload *upayload; struct key *ukey; ukey = request_key(&key_type_user, master_desc, NULL); if (IS_ERR(ukey)) goto error; down_read(&ukey->sem); upayload = user_key_payload_locked(ukey); if (!upayload) { /* key was revoked before we acquired its semaphore */ up_read(&ukey->sem); key_put(ukey); ukey = ERR_PTR(-EKEYREVOKED); goto error; } *master_key = upayload->data; *master_keylen = upayload->datalen; error: return ukey; } static int calc_hmac(u8 *digest, const u8 *key, unsigned int keylen, const u8 *buf, unsigned int buflen) { struct crypto_shash *tfm; int err; tfm = crypto_alloc_shash(hmac_alg, 0, 0); if (IS_ERR(tfm)) { pr_err("encrypted_key: can't alloc %s transform: %ld\n", hmac_alg, PTR_ERR(tfm)); return PTR_ERR(tfm); } err = crypto_shash_setkey(tfm, key, keylen); if (!err) err = crypto_shash_tfm_digest(tfm, buf, buflen, digest); crypto_free_shash(tfm); return err; } enum derived_key_type { ENC_KEY, AUTH_KEY }; /* Derive authentication/encryption key from trusted key */ static int get_derived_key(u8 *derived_key, enum derived_key_type key_type, const u8 *master_key, size_t master_keylen) { u8 *derived_buf; unsigned int derived_buf_len; int ret; derived_buf_len = strlen("AUTH_KEY") + 1 + master_keylen; if (derived_buf_len < HASH_SIZE) derived_buf_len = HASH_SIZE; derived_buf = kzalloc(derived_buf_len, GFP_KERNEL); if (!derived_buf) return -ENOMEM; if (key_type) strcpy(derived_buf, "AUTH_KEY"); else strcpy(derived_buf, "ENC_KEY"); memcpy(derived_buf + strlen(derived_buf) + 1, master_key, master_keylen); ret = crypto_shash_tfm_digest(hash_tfm, derived_buf, derived_buf_len, derived_key); kfree_sensitive(derived_buf); return ret; } static struct skcipher_request *init_skcipher_req(const u8 *key, unsigned int key_len) { struct skcipher_request *req; struct crypto_skcipher *tfm; int ret; tfm = crypto_alloc_skcipher(blkcipher_alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { pr_err("encrypted_key: failed to load %s transform (%ld)\n", blkcipher_alg, PTR_ERR(tfm)); return ERR_CAST(tfm); } ret = crypto_skcipher_setkey(tfm, key, key_len); if (ret < 0) { pr_err("encrypted_key: failed to setkey (%d)\n", ret); crypto_free_skcipher(tfm); return ERR_PTR(ret); } req = skcipher_request_alloc(tfm, GFP_KERNEL); if (!req) { pr_err("encrypted_key: failed to allocate request for %s\n", blkcipher_alg); crypto_free_skcipher(tfm); return ERR_PTR(-ENOMEM); } skcipher_request_set_callback(req, 0, NULL, NULL); return req; } static struct key *request_master_key(struct encrypted_key_payload *epayload, const u8 **master_key, size_t *master_keylen) { struct key *mkey = ERR_PTR(-EINVAL); if (!strncmp(epayload->master_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) { mkey = request_trusted_key(epayload->master_desc + KEY_TRUSTED_PREFIX_LEN, master_key, master_keylen); } else if (!strncmp(epayload->master_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) { mkey = request_user_key(epayload->master_desc + KEY_USER_PREFIX_LEN, master_key, master_keylen); } else goto out; if (IS_ERR(mkey)) { int ret = PTR_ERR(mkey); if (ret == -ENOTSUPP) pr_info("encrypted_key: key %s not supported", epayload->master_desc); else pr_info("encrypted_key: key %s not found", epayload->master_desc); goto out; } dump_master_key(*master_key, *master_keylen); out: return mkey; } /* Before returning data to userspace, encrypt decrypted data. */ static int derived_key_encrypt(struct encrypted_key_payload *epayload, const u8 *derived_key, unsigned int derived_keylen) { struct scatterlist sg_in[2]; struct scatterlist sg_out[1]; struct crypto_skcipher *tfm; struct skcipher_request *req; unsigned int encrypted_datalen; u8 iv[AES_BLOCK_SIZE]; int ret; encrypted_datalen = roundup(epayload->decrypted_datalen, blksize); req = init_skcipher_req(derived_key, derived_keylen); ret = PTR_ERR(req); if (IS_ERR(req)) goto out; dump_decrypted_data(epayload); sg_init_table(sg_in, 2); sg_set_buf(&sg_in[0], epayload->decrypted_data, epayload->decrypted_datalen); sg_set_page(&sg_in[1], ZERO_PAGE(0), AES_BLOCK_SIZE, 0); sg_init_table(sg_out, 1); sg_set_buf(sg_out, epayload->encrypted_data, encrypted_datalen); memcpy(iv, epayload->iv, sizeof(iv)); skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, iv); ret = crypto_skcipher_encrypt(req); tfm = crypto_skcipher_reqtfm(req); skcipher_request_free(req); crypto_free_skcipher(tfm); if (ret < 0) pr_err("encrypted_key: failed to encrypt (%d)\n", ret); else dump_encrypted_data(epayload, encrypted_datalen); out: return ret; } static int datablob_hmac_append(struct encrypted_key_payload *epayload, const u8 *master_key, size_t master_keylen) { u8 derived_key[HASH_SIZE]; u8 *digest; int ret; ret = get_derived_key(derived_key, AUTH_KEY, master_key, master_keylen); if (ret < 0) goto out; digest = epayload->format + epayload->datablob_len; ret = calc_hmac(digest, derived_key, sizeof derived_key, epayload->format, epayload->datablob_len); if (!ret) dump_hmac(NULL, digest, HASH_SIZE); out: memzero_explicit(derived_key, sizeof(derived_key)); return ret; } /* verify HMAC before decrypting encrypted key */ static int datablob_hmac_verify(struct encrypted_key_payload *epayload, const u8 *format, const u8 *master_key, size_t master_keylen) { u8 derived_key[HASH_SIZE]; u8 digest[HASH_SIZE]; int ret; char *p; unsigned short len; ret = get_derived_key(derived_key, AUTH_KEY, master_key, master_keylen); if (ret < 0) goto out; len = epayload->datablob_len; if (!format) { p = epayload->master_desc; len -= strlen(epayload->format) + 1; } else p = epayload->format; ret = calc_hmac(digest, derived_key, sizeof derived_key, p, len); if (ret < 0) goto out; ret = crypto_memneq(digest, epayload->format + epayload->datablob_len, sizeof(digest)); if (ret) { ret = -EINVAL; dump_hmac("datablob", epayload->format + epayload->datablob_len, HASH_SIZE); dump_hmac("calc", digest, HASH_SIZE); } out: memzero_explicit(derived_key, sizeof(derived_key)); return ret; } static int derived_key_decrypt(struct encrypted_key_payload *epayload, const u8 *derived_key, unsigned int derived_keylen) { struct scatterlist sg_in[1]; struct scatterlist sg_out[2]; struct crypto_skcipher *tfm; struct skcipher_request *req; unsigned int encrypted_datalen; u8 iv[AES_BLOCK_SIZE]; u8 *pad; int ret; /* Throwaway buffer to hold the unused zero padding at the end */ pad = kmalloc(AES_BLOCK_SIZE, GFP_KERNEL); if (!pad) return -ENOMEM; encrypted_datalen = roundup(epayload->decrypted_datalen, blksize); req = init_skcipher_req(derived_key, derived_keylen); ret = PTR_ERR(req); if (IS_ERR(req)) goto out; dump_encrypted_data(epayload, encrypted_datalen); sg_init_table(sg_in, 1); sg_init_table(sg_out, 2); sg_set_buf(sg_in, epayload->encrypted_data, encrypted_datalen); sg_set_buf(&sg_out[0], epayload->decrypted_data, epayload->decrypted_datalen); sg_set_buf(&sg_out[1], pad, AES_BLOCK_SIZE); memcpy(iv, epayload->iv, sizeof(iv)); skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, iv); ret = crypto_skcipher_decrypt(req); tfm = crypto_skcipher_reqtfm(req); skcipher_request_free(req); crypto_free_skcipher(tfm); if (ret < 0) goto out; dump_decrypted_data(epayload); out: kfree(pad); return ret; } /* Allocate memory for decrypted key and datablob. */ static struct encrypted_key_payload *encrypted_key_alloc(struct key *key, const char *format, const char *master_desc, const char *datalen, const char *decrypted_data) { struct encrypted_key_payload *epayload = NULL; unsigned short datablob_len; unsigned short decrypted_datalen; unsigned short payload_datalen; unsigned int encrypted_datalen; unsigned int format_len; long dlen; int i; int ret; ret = kstrtol(datalen, 10, &dlen); if (ret < 0 || dlen < MIN_DATA_SIZE || dlen > MAX_DATA_SIZE) return ERR_PTR(-EINVAL); format_len = (!format) ? strlen(key_format_default) : strlen(format); decrypted_datalen = dlen; payload_datalen = decrypted_datalen; if (decrypted_data) { if (!user_decrypted_data) { pr_err("encrypted key: instantiation of keys using provided decrypted data is disabled since CONFIG_USER_DECRYPTED_DATA is set to false\n"); return ERR_PTR(-EINVAL); } if (strlen(decrypted_data) != decrypted_datalen * 2) { pr_err("encrypted key: decrypted data provided does not match decrypted data length provided\n"); return ERR_PTR(-EINVAL); } for (i = 0; i < strlen(decrypted_data); i++) { if (!isxdigit(decrypted_data[i])) { pr_err("encrypted key: decrypted data provided must contain only hexadecimal characters\n"); return ERR_PTR(-EINVAL); } } } if (format) { if (!strcmp(format, key_format_ecryptfs)) { if (dlen != ECRYPTFS_MAX_KEY_BYTES) { pr_err("encrypted_key: keylen for the ecryptfs format must be equal to %d bytes\n", ECRYPTFS_MAX_KEY_BYTES); return ERR_PTR(-EINVAL); } decrypted_datalen = ECRYPTFS_MAX_KEY_BYTES; payload_datalen = sizeof(struct ecryptfs_auth_tok); } else if (!strcmp(format, key_format_enc32)) { if (decrypted_datalen != KEY_ENC32_PAYLOAD_LEN) { pr_err("encrypted_key: enc32 key payload incorrect length: %d\n", decrypted_datalen); return ERR_PTR(-EINVAL); } } } encrypted_datalen = roundup(decrypted_datalen, blksize); datablob_len = format_len + 1 + strlen(master_desc) + 1 + strlen(datalen) + 1 + ivsize + 1 + encrypted_datalen; ret = key_payload_reserve(key, payload_datalen + datablob_len + HASH_SIZE + 1); if (ret < 0) return ERR_PTR(ret); epayload = kzalloc(sizeof(*epayload) + payload_datalen + datablob_len + HASH_SIZE + 1, GFP_KERNEL); if (!epayload) return ERR_PTR(-ENOMEM); epayload->payload_datalen = payload_datalen; epayload->decrypted_datalen = decrypted_datalen; epayload->datablob_len = datablob_len; return epayload; } static int encrypted_key_decrypt(struct encrypted_key_payload *epayload, const char *format, const char *hex_encoded_iv) { struct key *mkey; u8 derived_key[HASH_SIZE]; const u8 *master_key; u8 *hmac; const char *hex_encoded_data; unsigned int encrypted_datalen; size_t master_keylen; size_t asciilen; int ret; encrypted_datalen = roundup(epayload->decrypted_datalen, blksize); asciilen = (ivsize + 1 + encrypted_datalen + HASH_SIZE) * 2; if (strlen(hex_encoded_iv) != asciilen) return -EINVAL; hex_encoded_data = hex_encoded_iv + (2 * ivsize) + 2; ret = hex2bin(epayload->iv, hex_encoded_iv, ivsize); if (ret < 0) return -EINVAL; ret = hex2bin(epayload->encrypted_data, hex_encoded_data, encrypted_datalen); if (ret < 0) return -EINVAL; hmac = epayload->format + epayload->datablob_len; ret = hex2bin(hmac, hex_encoded_data + (encrypted_datalen * 2), HASH_SIZE); if (ret < 0) return -EINVAL; mkey = request_master_key(epayload, &master_key, &master_keylen); if (IS_ERR(mkey)) return PTR_ERR(mkey); ret = datablob_hmac_verify(epayload, format, master_key, master_keylen); if (ret < 0) { pr_err("encrypted_key: bad hmac (%d)\n", ret); goto out; } ret = get_derived_key(derived_key, ENC_KEY, master_key, master_keylen); if (ret < 0) goto out; ret = derived_key_decrypt(epayload, derived_key, sizeof derived_key); if (ret < 0) pr_err("encrypted_key: failed to decrypt key (%d)\n", ret); out: up_read(&mkey->sem); key_put(mkey); memzero_explicit(derived_key, sizeof(derived_key)); return ret; } static void __ekey_init(struct encrypted_key_payload *epayload, const char *format, const char *master_desc, const char *datalen) { unsigned int format_len; format_len = (!format) ? strlen(key_format_default) : strlen(format); epayload->format = epayload->payload_data + epayload->payload_datalen; epayload->master_desc = epayload->format + format_len + 1; epayload->datalen = epayload->master_desc + strlen(master_desc) + 1; epayload->iv = epayload->datalen + strlen(datalen) + 1; epayload->encrypted_data = epayload->iv + ivsize + 1; epayload->decrypted_data = epayload->payload_data; if (!format) memcpy(epayload->format, key_format_default, format_len); else { if (!strcmp(format, key_format_ecryptfs)) epayload->decrypted_data = ecryptfs_get_auth_tok_key((struct ecryptfs_auth_tok *)epayload->payload_data); memcpy(epayload->format, format, format_len); } memcpy(epayload->master_desc, master_desc, strlen(master_desc)); memcpy(epayload->datalen, datalen, strlen(datalen)); } /* * encrypted_init - initialize an encrypted key * * For a new key, use either a random number or user-provided decrypted data in * case it is provided. A random number is used for the iv in both cases. For * an old key, decrypt the hex encoded data. */ static int encrypted_init(struct encrypted_key_payload *epayload, const char *key_desc, const char *format, const char *master_desc, const char *datalen, const char *hex_encoded_iv, const char *decrypted_data) { int ret = 0; if (format && !strcmp(format, key_format_ecryptfs)) { ret = valid_ecryptfs_desc(key_desc); if (ret < 0) return ret; ecryptfs_fill_auth_tok((struct ecryptfs_auth_tok *)epayload->payload_data, key_desc); } __ekey_init(epayload, format, master_desc, datalen); if (hex_encoded_iv) { ret = encrypted_key_decrypt(epayload, format, hex_encoded_iv); } else if (decrypted_data) { get_random_bytes(epayload->iv, ivsize); ret = hex2bin(epayload->decrypted_data, decrypted_data, epayload->decrypted_datalen); } else { get_random_bytes(epayload->iv, ivsize); get_random_bytes(epayload->decrypted_data, epayload->decrypted_datalen); } return ret; } /* * encrypted_instantiate - instantiate an encrypted key * * Instantiates the key: * - by decrypting an existing encrypted datablob, or * - by creating a new encrypted key based on a kernel random number, or * - using provided decrypted data. * * On success, return 0. Otherwise return errno. */ static int encrypted_instantiate(struct key *key, struct key_preparsed_payload *prep) { struct encrypted_key_payload *epayload = NULL; char *datablob = NULL; const char *format = NULL; char *master_desc = NULL; char *decrypted_datalen = NULL; char *hex_encoded_iv = NULL; char *decrypted_data = NULL; size_t datalen = prep->datalen; int ret; if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; datablob = kmalloc(datalen + 1, GFP_KERNEL); if (!datablob) return -ENOMEM; datablob[datalen] = 0; memcpy(datablob, prep->data, datalen); ret = datablob_parse(datablob, &format, &master_desc, &decrypted_datalen, &hex_encoded_iv, &decrypted_data); if (ret < 0) goto out; epayload = encrypted_key_alloc(key, format, master_desc, decrypted_datalen, decrypted_data); if (IS_ERR(epayload)) { ret = PTR_ERR(epayload); goto out; } ret = encrypted_init(epayload, key->description, format, master_desc, decrypted_datalen, hex_encoded_iv, decrypted_data); if (ret < 0) { kfree_sensitive(epayload); goto out; } rcu_assign_keypointer(key, epayload); out: kfree_sensitive(datablob); return ret; } static void encrypted_rcu_free(struct rcu_head *rcu) { struct encrypted_key_payload *epayload; epayload = container_of(rcu, struct encrypted_key_payload, rcu); kfree_sensitive(epayload); } /* * encrypted_update - update the master key description * * Change the master key description for an existing encrypted key. * The next read will return an encrypted datablob using the new * master key description. * * On success, return 0. Otherwise return errno. */ static int encrypted_update(struct key *key, struct key_preparsed_payload *prep) { struct encrypted_key_payload *epayload = key->payload.data[0]; struct encrypted_key_payload *new_epayload; char *buf; char *new_master_desc = NULL; const char *format = NULL; size_t datalen = prep->datalen; int ret = 0; if (key_is_negative(key)) return -ENOKEY; if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; buf = kmalloc(datalen + 1, GFP_KERNEL); if (!buf) return -ENOMEM; buf[datalen] = 0; memcpy(buf, prep->data, datalen); ret = datablob_parse(buf, &format, &new_master_desc, NULL, NULL, NULL); if (ret < 0) goto out; ret = valid_master_desc(new_master_desc, epayload->master_desc); if (ret < 0) goto out; new_epayload = encrypted_key_alloc(key, epayload->format, new_master_desc, epayload->datalen, NULL); if (IS_ERR(new_epayload)) { ret = PTR_ERR(new_epayload); goto out; } __ekey_init(new_epayload, epayload->format, new_master_desc, epayload->datalen); memcpy(new_epayload->iv, epayload->iv, ivsize); memcpy(new_epayload->payload_data, epayload->payload_data, epayload->payload_datalen); rcu_assign_keypointer(key, new_epayload); call_rcu(&epayload->rcu, encrypted_rcu_free); out: kfree_sensitive(buf); return ret; } /* * encrypted_read - format and copy out the encrypted data * * The resulting datablob format is: * <master-key name> <decrypted data length> <encrypted iv> <encrypted data> * * On success, return to userspace the encrypted key datablob size. */ static long encrypted_read(const struct key *key, char *buffer, size_t buflen) { struct encrypted_key_payload *epayload; struct key *mkey; const u8 *master_key; size_t master_keylen; char derived_key[HASH_SIZE]; char *ascii_buf; size_t asciiblob_len; int ret; epayload = dereference_key_locked(key); /* returns the hex encoded iv, encrypted-data, and hmac as ascii */ asciiblob_len = epayload->datablob_len + ivsize + 1 + roundup(epayload->decrypted_datalen, blksize) + (HASH_SIZE * 2); if (!buffer || buflen < asciiblob_len) return asciiblob_len; mkey = request_master_key(epayload, &master_key, &master_keylen); if (IS_ERR(mkey)) return PTR_ERR(mkey); ret = get_derived_key(derived_key, ENC_KEY, master_key, master_keylen); if (ret < 0) goto out; ret = derived_key_encrypt(epayload, derived_key, sizeof derived_key); if (ret < 0) goto out; ret = datablob_hmac_append(epayload, master_key, master_keylen); if (ret < 0) goto out; ascii_buf = datablob_format(epayload, asciiblob_len); if (!ascii_buf) { ret = -ENOMEM; goto out; } up_read(&mkey->sem); key_put(mkey); memzero_explicit(derived_key, sizeof(derived_key)); memcpy(buffer, ascii_buf, asciiblob_len); kfree_sensitive(ascii_buf); return asciiblob_len; out: up_read(&mkey->sem); key_put(mkey); memzero_explicit(derived_key, sizeof(derived_key)); return ret; } /* * encrypted_destroy - clear and free the key's payload */ static void encrypted_destroy(struct key *key) { kfree_sensitive(key->payload.data[0]); } struct key_type key_type_encrypted = { .name = "encrypted", .instantiate = encrypted_instantiate, .update = encrypted_update, .destroy = encrypted_destroy, .describe = user_describe, .read = encrypted_read, }; EXPORT_SYMBOL_GPL(key_type_encrypted); static int __init init_encrypted(void) { int ret; hash_tfm = crypto_alloc_shash(hash_alg, 0, 0); if (IS_ERR(hash_tfm)) { pr_err("encrypted_key: can't allocate %s transform: %ld\n", hash_alg, PTR_ERR(hash_tfm)); return PTR_ERR(hash_tfm); } ret = aes_get_sizes(); if (ret < 0) goto out; ret = register_key_type(&key_type_encrypted); if (ret < 0) goto out; return 0; out: crypto_free_shash(hash_tfm); return ret; } static void __exit cleanup_encrypted(void) { crypto_free_shash(hash_tfm); unregister_key_type(&key_type_encrypted); } late_initcall(init_encrypted); module_exit(cleanup_encrypted); MODULE_DESCRIPTION("Encrypted key type"); MODULE_LICENSE("GPL"); |
| 1 1 1 1 1 1 1 1 1 1 1 1 1 1 9 9 9 8 9 8 9 8 9 9 9 8 9 7 6 7 7 7 7 7 7 7 2 2 25 25 2 2 18 18 7 7 1 1 1 1 1 18 6 6 2 12 18 12 11 12 12 2 2 2 6 18 18 18 18 18 18 18 18 6 18 6 5 6 6 18 6 6 18 9 9 9 16 16 16 16 16 6 6 6 6 6 5 6 6 6 6 1 1 1 1 1 2 1 2 2 2 2 1 1 1 1 1 1 1 2 34 35 2 70 70 69 70 70 70 70 70 70 69 69 70 69 70 70 18 18 18 18 18 18 18 2 1 18 5 18 12 18 70 69 69 70 70 70 70 70 70 2 2 2 2 2 2 52 53 1 53 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 | // SPDX-License-Identifier: GPL-2.0 /* Multipath TCP * * Copyright (c) 2017 - 2019, Intel Corporation. */ #define pr_fmt(fmt) "MPTCP: " fmt #include <linux/kernel.h> #include <linux/module.h> #include <linux/netdevice.h> #include <crypto/sha2.h> #include <crypto/utils.h> #include <net/sock.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> #include <net/protocol.h> #if IS_ENABLED(CONFIG_MPTCP_IPV6) #include <net/ip6_route.h> #include <net/transp_v6.h> #endif #include <net/mptcp.h> #include "protocol.h" #include "mib.h" #include <trace/events/mptcp.h> #include <trace/events/sock.h> static void mptcp_subflow_ops_undo_override(struct sock *ssk); static void SUBFLOW_REQ_INC_STATS(struct request_sock *req, enum linux_mptcp_mib_field field) { MPTCP_INC_STATS(sock_net(req_to_sk(req)), field); } static void subflow_req_destructor(struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); pr_debug("subflow_req=%p\n", subflow_req); if (subflow_req->msk) sock_put((struct sock *)subflow_req->msk); mptcp_token_destroy_request(req); } static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2, void *hmac) { u8 msg[8]; put_unaligned_be32(nonce1, &msg[0]); put_unaligned_be32(nonce2, &msg[4]); mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac); } static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk) { return mptcp_is_fully_established((void *)msk) && ((mptcp_pm_is_userspace(msk) && mptcp_userspace_pm_active(msk)) || READ_ONCE(msk->pm.accept_subflow)); } /* validate received token and create truncated hmac and nonce for SYN-ACK */ static void subflow_req_create_thmac(struct mptcp_subflow_request_sock *subflow_req) { struct mptcp_sock *msk = subflow_req->msk; u8 hmac[SHA256_DIGEST_SIZE]; get_random_bytes(&subflow_req->local_nonce, sizeof(u32)); subflow_generate_hmac(READ_ONCE(msk->local_key), READ_ONCE(msk->remote_key), subflow_req->local_nonce, subflow_req->remote_nonce, hmac); subflow_req->thmac = get_unaligned_be64(hmac); } static struct mptcp_sock *subflow_token_join_request(struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct mptcp_sock *msk; int local_id; msk = mptcp_token_get_sock(sock_net(req_to_sk(req)), subflow_req->token); if (!msk) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN); return NULL; } local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req); if (local_id < 0) { sock_put((struct sock *)msk); return NULL; } subflow_req->local_id = local_id; subflow_req->request_bkup = mptcp_pm_is_backup(msk, (struct sock_common *)req); return msk; } static void subflow_init_req(struct request_sock *req, const struct sock *sk_listener) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); subflow_req->mp_capable = 0; subflow_req->mp_join = 0; subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener)); subflow_req->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk_listener)); subflow_req->msk = NULL; mptcp_token_init_request(req); } static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct sock *sk) { return inet_sk(sk)->inet_sport != inet_sk((struct sock *)msk)->inet_sport; } static void subflow_add_reset_reason(struct sk_buff *skb, u8 reason) { struct mptcp_ext *mpext = skb_ext_add(skb, SKB_EXT_MPTCP); if (mpext) { memset(mpext, 0, sizeof(*mpext)); mpext->reset_reason = reason; } } static int subflow_reset_req_endp(struct request_sock *req, struct sk_buff *skb) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEENDPATTEMPT); subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); return -EPERM; } /* Init mptcp request socket. * * Returns an error code if a JOIN has failed and a TCP reset * should be sent. */ static int subflow_check_req(struct request_sock *req, const struct sock *sk_listener, struct sk_buff *skb) { struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct mptcp_options_received mp_opt; bool opt_mp_capable, opt_mp_join; pr_debug("subflow_req=%p, listener=%p\n", subflow_req, listener); #ifdef CONFIG_TCP_MD5SIG /* no MPTCP if MD5SIG is enabled on this socket or we may run out of * TCP option space. */ if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info)) { subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP); return -EINVAL; } #endif mptcp_get_options(skb, &mp_opt); opt_mp_capable = !!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYN); opt_mp_join = !!(mp_opt.suboptions & OPTION_MPTCP_MPJ_SYN); if (opt_mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); if (unlikely(listener->pm_listener)) return subflow_reset_req_endp(req, skb); if (opt_mp_join) return 0; } else if (opt_mp_join) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX); if (mp_opt.backup) SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNBACKUPRX); } else if (unlikely(listener->pm_listener)) { return subflow_reset_req_endp(req, skb); } if (opt_mp_capable && listener->request_mptcp) { int err, retries = MPTCP_TOKEN_MAX_RETRIES; subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; again: do { get_random_bytes(&subflow_req->local_key, sizeof(subflow_req->local_key)); } while (subflow_req->local_key == 0); if (unlikely(req->syncookie)) { mptcp_crypto_key_sha(subflow_req->local_key, &subflow_req->token, &subflow_req->idsn); if (mptcp_token_exists(subflow_req->token)) { if (retries-- > 0) goto again; SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_TOKENFALLBACKINIT); } else { subflow_req->mp_capable = 1; } return 0; } err = mptcp_token_new_request(req); if (err == 0) subflow_req->mp_capable = 1; else if (retries-- > 0) goto again; else SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_TOKENFALLBACKINIT); } else if (opt_mp_join && listener->request_mptcp) { subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; subflow_req->mp_join = 1; subflow_req->backup = mp_opt.backup; subflow_req->remote_id = mp_opt.join_id; subflow_req->token = mp_opt.token; subflow_req->remote_nonce = mp_opt.nonce; subflow_req->msk = subflow_token_join_request(req); /* Can't fall back to TCP in this case. */ if (!subflow_req->msk) { subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP); return -EPERM; } if (subflow_use_different_sport(subflow_req->msk, sk_listener)) { pr_debug("syn inet_sport=%d %d\n", ntohs(inet_sk(sk_listener)->inet_sport), ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport)); if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTSYNRX); subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); return -EPERM; } SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTSYNRX); } subflow_req_create_thmac(subflow_req); if (unlikely(req->syncookie)) { if (!mptcp_can_accept_new_subflow(subflow_req->msk)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINREJECTED); subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); return -EPERM; } subflow_init_req_cookie_join_save(subflow_req, skb); } pr_debug("token=%u, remote_nonce=%u msk=%p\n", subflow_req->token, subflow_req->remote_nonce, subflow_req->msk); } return 0; } int mptcp_subflow_init_cookie_req(struct request_sock *req, const struct sock *sk_listener, struct sk_buff *skb) { struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct mptcp_options_received mp_opt; bool opt_mp_capable, opt_mp_join; int err; subflow_init_req(req, sk_listener); mptcp_get_options(skb, &mp_opt); opt_mp_capable = !!(mp_opt.suboptions & OPTION_MPTCP_MPC_ACK); opt_mp_join = !!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK); if (opt_mp_capable && opt_mp_join) return -EINVAL; if (opt_mp_capable && listener->request_mptcp) { if (mp_opt.sndr_key == 0) return -EINVAL; subflow_req->local_key = mp_opt.rcvr_key; err = mptcp_token_new_request(req); if (err) return err; subflow_req->mp_capable = 1; subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1; } else if (opt_mp_join && listener->request_mptcp) { if (!mptcp_token_join_cookie_init_state(subflow_req, skb)) return -EINVAL; subflow_req->mp_join = 1; subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1; } return 0; } EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req); static enum sk_rst_reason mptcp_get_rst_reason(const struct sk_buff *skb) { const struct mptcp_ext *mpext = mptcp_get_ext(skb); if (!mpext) return SK_RST_REASON_NOT_SPECIFIED; return sk_rst_convert_mptcp_reason(mpext->reset_reason); } static struct dst_entry *subflow_v4_route_req(const struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct request_sock *req, u32 tw_isn) { struct dst_entry *dst; int err; tcp_rsk(req)->is_mptcp = 1; subflow_init_req(req, sk); dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req, tw_isn); if (!dst) return NULL; err = subflow_check_req(req, sk, skb); if (err == 0) return dst; dst_release(dst); if (!req->syncookie) tcp_request_sock_ops.send_reset(sk, skb, mptcp_get_rst_reason(skb)); return NULL; } static void subflow_prep_synack(const struct sock *sk, struct request_sock *req, struct tcp_fastopen_cookie *foc, enum tcp_synack_type synack_type) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct inet_request_sock *ireq = inet_rsk(req); /* clear tstamp_ok, as needed depending on cookie */ if (foc && foc->len > -1) ireq->tstamp_ok = 0; if (synack_type == TCP_SYNACK_FASTOPEN) mptcp_fastopen_subflow_synack_set_params(subflow, req); } static int subflow_v4_send_synack(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, enum tcp_synack_type synack_type, struct sk_buff *syn_skb) { subflow_prep_synack(sk, req, foc, synack_type); return tcp_request_sock_ipv4_ops.send_synack(sk, dst, fl, req, foc, synack_type, syn_skb); } #if IS_ENABLED(CONFIG_MPTCP_IPV6) static int subflow_v6_send_synack(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, enum tcp_synack_type synack_type, struct sk_buff *syn_skb) { subflow_prep_synack(sk, req, foc, synack_type); return tcp_request_sock_ipv6_ops.send_synack(sk, dst, fl, req, foc, synack_type, syn_skb); } static struct dst_entry *subflow_v6_route_req(const struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct request_sock *req, u32 tw_isn) { struct dst_entry *dst; int err; tcp_rsk(req)->is_mptcp = 1; subflow_init_req(req, sk); dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req, tw_isn); if (!dst) return NULL; err = subflow_check_req(req, sk, skb); if (err == 0) return dst; dst_release(dst); if (!req->syncookie) tcp6_request_sock_ops.send_reset(sk, skb, mptcp_get_rst_reason(skb)); return NULL; } #endif /* validate received truncated hmac and create hmac for third ACK */ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow) { u8 hmac[SHA256_DIGEST_SIZE]; u64 thmac; subflow_generate_hmac(subflow->remote_key, subflow->local_key, subflow->remote_nonce, subflow->local_nonce, hmac); thmac = get_unaligned_be64(hmac); pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n", subflow, subflow->token, thmac, subflow->thmac); return thmac == subflow->thmac; } void mptcp_subflow_reset(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = subflow->conn; /* mptcp_mp_fail_no_response() can reach here on an already closed * socket */ if (ssk->sk_state == TCP_CLOSE) return; /* must hold: tcp_done() could drop last reference on parent */ sock_hold(sk); mptcp_send_active_reset_reason(ssk); tcp_done(ssk); if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags)) mptcp_schedule_work(sk); sock_put(sk); } static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct sock *sk) { return inet_sk(sk)->inet_dport != inet_sk((struct sock *)msk)->inet_dport; } void __mptcp_sync_state(struct sock *sk, int state) { struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); struct sock *ssk = msk->first; subflow = mptcp_subflow_ctx(ssk); __mptcp_propagate_sndbuf(sk, ssk); if (!msk->rcvspace_init) mptcp_rcv_space_init(msk, ssk); if (sk->sk_state == TCP_SYN_SENT) { /* subflow->idsn is always available is TCP_SYN_SENT state, * even for the FASTOPEN scenarios */ WRITE_ONCE(msk->write_seq, subflow->idsn + 1); WRITE_ONCE(msk->snd_nxt, msk->write_seq); mptcp_set_state(sk, state); sk->sk_state_change(sk); } } static void subflow_set_remote_key(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, const struct mptcp_options_received *mp_opt) { /* active MPC subflow will reach here multiple times: * at subflow_finish_connect() time and at 4th ack time */ if (subflow->remote_key_valid) return; subflow->remote_key_valid = 1; subflow->remote_key = mp_opt->sndr_key; mptcp_crypto_key_sha(subflow->remote_key, NULL, &subflow->iasn); subflow->iasn++; WRITE_ONCE(msk->remote_key, subflow->remote_key); WRITE_ONCE(msk->ack_seq, subflow->iasn); WRITE_ONCE(msk->can_ack, true); atomic64_set(&msk->rcv_wnd_sent, subflow->iasn); } static void mptcp_propagate_state(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow, const struct mptcp_options_received *mp_opt) { struct mptcp_sock *msk = mptcp_sk(sk); mptcp_data_lock(sk); if (mp_opt) { /* Options are available only in the non fallback cases * avoid updating rx path fields otherwise */ WRITE_ONCE(msk->snd_una, subflow->idsn + 1); WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd); subflow_set_remote_key(msk, subflow, mp_opt); } if (!sock_owned_by_user(sk)) { __mptcp_sync_state(sk, ssk->sk_state); } else { msk->pending_state = ssk->sk_state; __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags); } mptcp_data_unlock(sk); } static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_options_received mp_opt; struct sock *parent = subflow->conn; struct mptcp_sock *msk; subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); /* be sure no special action on any packet other than syn-ack */ if (subflow->conn_finished) return; msk = mptcp_sk(parent); subflow->rel_write_seq = 1; subflow->conn_finished = 1; subflow->ssn_offset = TCP_SKB_CB(skb)->seq; pr_debug("subflow=%p synack seq=%x\n", subflow, subflow->ssn_offset); mptcp_get_options(skb, &mp_opt); if (subflow->request_mptcp) { if (!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYNACK)) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); mptcp_do_fallback(sk); pr_fallback(msk); goto fallback; } if (mp_opt.suboptions & OPTION_MPTCP_CSUMREQD) WRITE_ONCE(msk->csum_enabled, true); if (mp_opt.deny_join_id0) WRITE_ONCE(msk->pm.remote_deny_join_id0, true); subflow->mp_capable = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK); mptcp_finish_connect(sk); mptcp_active_enable(parent); mptcp_propagate_state(parent, sk, subflow, &mp_opt); } else if (subflow->request_join) { u8 hmac[SHA256_DIGEST_SIZE]; if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_SYNACK)) { subflow->reset_reason = MPTCP_RST_EMPTCP; goto do_reset; } subflow->backup = mp_opt.backup; subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; WRITE_ONCE(subflow->remote_id, mp_opt.join_id); pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d\n", subflow, subflow->thmac, subflow->remote_nonce, subflow->backup); if (!subflow_thmac_valid(subflow)) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC); subflow->reset_reason = MPTCP_RST_EMPTCP; goto do_reset; } if (!mptcp_finish_join(sk)) goto do_reset; subflow_generate_hmac(subflow->local_key, subflow->remote_key, subflow->local_nonce, subflow->remote_nonce, hmac); memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN); subflow->mp_join = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); if (subflow->backup) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKBACKUPRX); if (subflow_use_different_dport(msk, sk)) { pr_debug("synack inet_dport=%d %d\n", ntohs(inet_sk(sk)->inet_dport), ntohs(inet_sk(parent)->inet_dport)); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINPORTSYNACKRX); } } else if (mptcp_check_fallback(sk)) { /* It looks like MPTCP is blocked, while TCP is not */ if (subflow->mpc_drop) mptcp_active_disable(parent); fallback: mptcp_propagate_state(parent, sk, subflow, NULL); } return; do_reset: subflow->reset_transient = 0; mptcp_subflow_reset(sk); } static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id) { WARN_ON_ONCE(local_id < 0 || local_id > 255); WRITE_ONCE(subflow->local_id, local_id); } static int subflow_chk_local_id(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); int err; if (likely(subflow->local_id >= 0)) return 0; err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk); if (err < 0) return err; subflow_set_local_id(subflow, err); subflow->request_bkup = mptcp_pm_is_backup(msk, (struct sock_common *)sk); return 0; } static int subflow_rebuild_header(struct sock *sk) { int err = subflow_chk_local_id(sk); if (unlikely(err < 0)) return err; return inet_sk_rebuild_header(sk); } #if IS_ENABLED(CONFIG_MPTCP_IPV6) static int subflow_v6_rebuild_header(struct sock *sk) { int err = subflow_chk_local_id(sk); if (unlikely(err < 0)) return err; return inet6_sk_rebuild_header(sk); } #endif static struct request_sock_ops mptcp_subflow_v4_request_sock_ops __ro_after_init; static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops __ro_after_init; static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); pr_debug("subflow=%p\n", subflow); /* Never answer to SYNs sent to broadcast or multicast */ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) goto drop; return tcp_conn_request(&mptcp_subflow_v4_request_sock_ops, &subflow_request_sock_ipv4_ops, sk, skb); drop: tcp_listendrop(sk); return 0; } static void subflow_v4_req_destructor(struct request_sock *req) { subflow_req_destructor(req); tcp_request_sock_ops.destructor(req); } #if IS_ENABLED(CONFIG_MPTCP_IPV6) static struct request_sock_ops mptcp_subflow_v6_request_sock_ops __ro_after_init; static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops __ro_after_init; static struct inet_connection_sock_af_ops subflow_v6_specific __ro_after_init; static struct inet_connection_sock_af_ops subflow_v6m_specific __ro_after_init; static struct proto tcpv6_prot_override __ro_after_init; static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); pr_debug("subflow=%p\n", subflow); if (skb->protocol == htons(ETH_P_IP)) return subflow_v4_conn_request(sk, skb); if (!ipv6_unicast_destination(skb)) goto drop; if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); return 0; } return tcp_conn_request(&mptcp_subflow_v6_request_sock_ops, &subflow_request_sock_ipv6_ops, sk, skb); drop: tcp_listendrop(sk); return 0; /* don't send reset */ } static void subflow_v6_req_destructor(struct request_sock *req) { subflow_req_destructor(req); tcp6_request_sock_ops.destructor(req); } #endif struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, bool attach_listener) { if (ops->family == AF_INET) ops = &mptcp_subflow_v4_request_sock_ops; #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (ops->family == AF_INET6) ops = &mptcp_subflow_v6_request_sock_ops; #endif return inet_reqsk_alloc(ops, sk_listener, attach_listener); } EXPORT_SYMBOL(mptcp_subflow_reqsk_alloc); /* validate hmac received in third ACK */ static bool subflow_hmac_valid(const struct mptcp_subflow_request_sock *subflow_req, const struct mptcp_options_received *mp_opt) { struct mptcp_sock *msk = subflow_req->msk; u8 hmac[SHA256_DIGEST_SIZE]; subflow_generate_hmac(READ_ONCE(msk->remote_key), READ_ONCE(msk->local_key), subflow_req->remote_nonce, subflow_req->local_nonce, hmac); return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN); } static void subflow_ulp_fallback(struct sock *sk, struct mptcp_subflow_context *old_ctx) { struct inet_connection_sock *icsk = inet_csk(sk); mptcp_subflow_tcp_fallback(sk, old_ctx); icsk->icsk_ulp_ops = NULL; rcu_assign_pointer(icsk->icsk_ulp_data, NULL); tcp_sk(sk)->is_mptcp = 0; mptcp_subflow_ops_undo_override(sk); } void mptcp_subflow_drop_ctx(struct sock *ssk) { struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk); if (!ctx) return; list_del(&mptcp_subflow_ctx(ssk)->node); if (inet_csk(ssk)->icsk_ulp_ops) { subflow_ulp_fallback(ssk, ctx); if (ctx->conn) sock_put(ctx->conn); } kfree_rcu(ctx, rcu); } void __mptcp_subflow_fully_established(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, const struct mptcp_options_received *mp_opt) { subflow_set_remote_key(msk, subflow, mp_opt); WRITE_ONCE(subflow->fully_established, true); WRITE_ONCE(msk->fully_established, true); } static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, bool *own_req) { struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk); struct mptcp_subflow_request_sock *subflow_req; struct mptcp_options_received mp_opt; bool fallback, fallback_is_fatal; enum sk_rst_reason reason; struct mptcp_sock *owner; struct sock *child; pr_debug("listener=%p, req=%p, conn=%p\n", listener, req, listener->conn); /* After child creation we must look for MPC even when options * are not parsed */ mp_opt.suboptions = 0; /* hopefully temporary handling for MP_JOIN+syncookie */ subflow_req = mptcp_subflow_rsk(req); fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join; fallback = !tcp_rsk(req)->is_mptcp; if (fallback) goto create_child; /* if the sk is MP_CAPABLE, we try to fetch the client key */ if (subflow_req->mp_capable) { /* we can receive and accept an in-window, out-of-order pkt, * which may not carry the MP_CAPABLE opt even on mptcp enabled * paths: always try to extract the peer key, and fallback * for packets missing it. * Even OoO DSS packets coming legitly after dropped or * reordered MPC will cause fallback, but we don't have other * options. */ mptcp_get_options(skb, &mp_opt); if (!(mp_opt.suboptions & (OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_ACK))) fallback = true; } else if (subflow_req->mp_join) { mptcp_get_options(skb, &mp_opt); if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK)) fallback = true; } create_child: child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, req_unhash, own_req); if (child && *own_req) { struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child); tcp_rsk(req)->drop_req = false; /* we need to fallback on ctx allocation failure and on pre-reqs * checking above. In the latter scenario we additionally need * to reset the context to non MPTCP status. */ if (!ctx || fallback) { if (fallback_is_fatal) { subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP); goto dispose_child; } goto fallback; } /* ssk inherits options of listener sk */ ctx->setsockopt_seq = listener->setsockopt_seq; if (ctx->mp_capable) { ctx->conn = mptcp_sk_clone_init(listener->conn, &mp_opt, child, req); if (!ctx->conn) goto fallback; ctx->subflow_id = 1; owner = mptcp_sk(ctx->conn); mptcp_pm_new_connection(owner, child, 1); /* with OoO packets we can reach here without ingress * mpc option */ if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) { mptcp_pm_fully_established(owner, child); ctx->pm_notified = 1; } } else if (ctx->mp_join) { owner = subflow_req->msk; if (!owner) { subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); goto dispose_child; } if (!subflow_hmac_valid(subflow_req, &mp_opt)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); goto dispose_child; } if (!mptcp_can_accept_new_subflow(owner)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINREJECTED); subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); goto dispose_child; } /* move the msk reference ownership to the subflow */ subflow_req->msk = NULL; ctx->conn = (struct sock *)owner; if (subflow_use_different_sport(owner, sk)) { pr_debug("ack inet_sport=%d %d\n", ntohs(inet_sk(sk)->inet_sport), ntohs(inet_sk((struct sock *)owner)->inet_sport)); if (!mptcp_pm_sport_in_anno_list(owner, sk)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX); subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); goto dispose_child; } SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTACKRX); } if (!mptcp_finish_join(child)) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(child); subflow_add_reset_reason(skb, subflow->reset_reason); goto dispose_child; } SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX); tcp_rsk(req)->drop_req = true; } } /* check for expected invariant - should never trigger, just help * catching earlier subtle bugs */ WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp && (!mptcp_subflow_ctx(child) || !mptcp_subflow_ctx(child)->conn)); return child; dispose_child: mptcp_subflow_drop_ctx(child); tcp_rsk(req)->drop_req = true; inet_csk_prepare_for_destroy_sock(child); tcp_done(child); reason = mptcp_get_rst_reason(skb); req->rsk_ops->send_reset(sk, skb, reason); /* The last child reference will be released by the caller */ return child; fallback: if (fallback) SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); mptcp_subflow_drop_ctx(child); return child; } static struct inet_connection_sock_af_ops subflow_specific __ro_after_init; static struct proto tcp_prot_override __ro_after_init; enum mapping_status { MAPPING_OK, MAPPING_INVALID, MAPPING_EMPTY, MAPPING_DATA_FIN, MAPPING_DUMMY, MAPPING_BAD_CSUM, MAPPING_NODSS }; static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) { pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d\n", ssn, subflow->map_subflow_seq, subflow->map_data_len); } static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); unsigned int skb_consumed; skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq; if (unlikely(skb_consumed >= skb->len)) { DEBUG_NET_WARN_ON_ONCE(1); return true; } return skb->len - skb_consumed <= subflow->map_data_len - mptcp_subflow_get_map_offset(subflow); } static bool validate_mapping(struct sock *ssk, struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); u32 ssn = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; if (unlikely(before(ssn, subflow->map_subflow_seq))) { /* Mapping covers data later in the subflow stream, * currently unsupported. */ dbg_bad_map(subflow, ssn); return false; } if (unlikely(!before(ssn, subflow->map_subflow_seq + subflow->map_data_len))) { /* Mapping does covers past subflow data, invalid */ dbg_bad_map(subflow, ssn); return false; } return true; } static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *skb, bool csum_reqd) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); u32 offset, seq, delta; __sum16 csum; int len; if (!csum_reqd) return MAPPING_OK; /* mapping already validated on previous traversal */ if (subflow->map_csum_len == subflow->map_data_len) return MAPPING_OK; /* traverse the receive queue, ensuring it contains a full * DSS mapping and accumulating the related csum. * Preserve the accoumlate csum across multiple calls, to compute * the csum only once */ delta = subflow->map_data_len - subflow->map_csum_len; for (;;) { seq = tcp_sk(ssk)->copied_seq + subflow->map_csum_len; offset = seq - TCP_SKB_CB(skb)->seq; /* if the current skb has not been accounted yet, csum its contents * up to the amount covered by the current DSS */ if (offset < skb->len) { __wsum csum; len = min(skb->len - offset, delta); csum = skb_checksum(skb, offset, len, 0); subflow->map_data_csum = csum_block_add(subflow->map_data_csum, csum, subflow->map_csum_len); delta -= len; subflow->map_csum_len += len; } if (delta == 0) break; if (skb_queue_is_last(&ssk->sk_receive_queue, skb)) { /* if this subflow is closed, the partial mapping * will be never completed; flush the pending skbs, so * that subflow_sched_work_if_closed() can kick in */ if (unlikely(ssk->sk_state == TCP_CLOSE)) while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); /* not enough data to validate the csum */ return MAPPING_EMPTY; } /* the DSS mapping for next skbs will be validated later, * when a get_mapping_status call will process such skb */ skb = skb->next; } /* note that 'map_data_len' accounts only for the carried data, does * not include the eventual seq increment due to the data fin, * while the pseudo header requires the original DSS data len, * including that */ csum = __mptcp_make_csum(subflow->map_seq, subflow->map_subflow_seq, subflow->map_data_len + subflow->map_data_fin, subflow->map_data_csum); if (unlikely(csum)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR); return MAPPING_BAD_CSUM; } subflow->valid_csum_seen = 1; return MAPPING_OK; } static enum mapping_status get_mapping_status(struct sock *ssk, struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); bool csum_reqd = READ_ONCE(msk->csum_enabled); struct mptcp_ext *mpext; struct sk_buff *skb; u16 data_len; u64 map_seq; skb = skb_peek(&ssk->sk_receive_queue); if (!skb) return MAPPING_EMPTY; if (mptcp_check_fallback(ssk)) return MAPPING_DUMMY; mpext = mptcp_get_ext(skb); if (!mpext || !mpext->use_map) { if (!subflow->map_valid && !skb->len) { /* the TCP stack deliver 0 len FIN pkt to the receive * queue, that is the only 0len pkts ever expected here, * and we can admit no mapping only for 0 len pkts */ if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) WARN_ONCE(1, "0len seq %d:%d flags %x", TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, TCP_SKB_CB(skb)->tcp_flags); sk_eat_skb(ssk, skb); return MAPPING_EMPTY; } /* If the required DSS has likely been dropped by a middlebox */ if (!subflow->map_valid) return MAPPING_NODSS; goto validate_seq; } trace_get_mapping_status(mpext); data_len = mpext->data_len; if (data_len == 0) { pr_debug("infinite mapping received\n"); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); return MAPPING_INVALID; } if (mpext->data_fin == 1) { u64 data_fin_seq; if (data_len == 1) { bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq, mpext->dsn64); pr_debug("DATA_FIN with no payload seq=%llu\n", mpext->data_seq); if (subflow->map_valid) { /* A DATA_FIN might arrive in a DSS * option before the previous mapping * has been fully consumed. Continue * handling the existing mapping. */ skb_ext_del(skb, SKB_EXT_MPTCP); return MAPPING_OK; } if (updated) mptcp_schedule_work((struct sock *)msk); return MAPPING_DATA_FIN; } data_fin_seq = mpext->data_seq + data_len - 1; /* If mpext->data_seq is a 32-bit value, data_fin_seq must also * be limited to 32 bits. */ if (!mpext->dsn64) data_fin_seq &= GENMASK_ULL(31, 0); mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64); pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d\n", data_fin_seq, mpext->dsn64); /* Adjust for DATA_FIN using 1 byte of sequence space */ data_len--; } map_seq = mptcp_expand_seq(READ_ONCE(msk->ack_seq), mpext->data_seq, mpext->dsn64); WRITE_ONCE(mptcp_sk(subflow->conn)->use_64bit_ack, !!mpext->dsn64); if (subflow->map_valid) { /* Allow replacing only with an identical map */ if (subflow->map_seq == map_seq && subflow->map_subflow_seq == mpext->subflow_seq && subflow->map_data_len == data_len && subflow->map_csum_reqd == mpext->csum_reqd) { skb_ext_del(skb, SKB_EXT_MPTCP); goto validate_csum; } /* If this skb data are fully covered by the current mapping, * the new map would need caching, which is not supported */ if (skb_is_fully_mapped(ssk, skb)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH); return MAPPING_INVALID; } /* will validate the next map after consuming the current one */ goto validate_csum; } subflow->map_seq = map_seq; subflow->map_subflow_seq = mpext->subflow_seq; subflow->map_data_len = data_len; subflow->map_valid = 1; subflow->map_data_fin = mpext->data_fin; subflow->mpc_map = mpext->mpc_map; subflow->map_csum_reqd = mpext->csum_reqd; subflow->map_csum_len = 0; subflow->map_data_csum = csum_unfold(mpext->csum); /* Cfr RFC 8684 Section 3.3.0 */ if (unlikely(subflow->map_csum_reqd != csum_reqd)) return MAPPING_INVALID; pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u\n", subflow->map_seq, subflow->map_subflow_seq, subflow->map_data_len, subflow->map_csum_reqd, subflow->map_data_csum); validate_seq: /* we revalidate valid mapping on new skb, because we must ensure * the current skb is completely covered by the available mapping */ if (!validate_mapping(ssk, skb)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSTCPMISMATCH); return MAPPING_INVALID; } skb_ext_del(skb, SKB_EXT_MPTCP); validate_csum: return validate_data_csum(ssk, skb, csum_reqd); } static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, u64 limit) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); bool fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; struct tcp_sock *tp = tcp_sk(ssk); u32 offset, incr, avail_len; offset = tp->copied_seq - TCP_SKB_CB(skb)->seq; if (WARN_ON_ONCE(offset > skb->len)) goto out; avail_len = skb->len - offset; incr = limit >= avail_len ? avail_len + fin : limit; pr_debug("discarding=%d len=%d offset=%d seq=%d\n", incr, skb->len, offset, subflow->map_subflow_seq); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA); tcp_sk(ssk)->copied_seq += incr; out: if (!before(tcp_sk(ssk)->copied_seq, TCP_SKB_CB(skb)->end_seq)) sk_eat_skb(ssk, skb); if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) subflow->map_valid = 0; } static bool subflow_is_done(const struct sock *sk) { return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE; } /* sched mptcp worker for subflow cleanup if no more data is pending */ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk) { struct sock *sk = (struct sock *)msk; if (likely(ssk->sk_state != TCP_CLOSE && (ssk->sk_state != TCP_CLOSE_WAIT || inet_sk_state_load(sk) != TCP_ESTABLISHED))) return; if (!skb_queue_empty(&ssk->sk_receive_queue)) return; if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) mptcp_schedule_work(sk); /* when the fallback subflow closes the rx side, trigger a 'dummy' * ingress data fin, so that the msk state will follow along */ if (__mptcp_check_fallback(msk) && subflow_is_done(ssk) && msk->first == ssk && mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true)) mptcp_schedule_work(sk); } static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); unsigned long fail_tout; /* graceful failure can happen only on the MPC subflow */ if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first))) return; /* since the close timeout take precedence on the fail one, * no need to start the latter when the first is already set */ if (sock_flag((struct sock *)msk, SOCK_DEAD)) return; /* we don't need extreme accuracy here, use a zero fail_tout as special * value meaning no fail timeout at all; */ fail_tout = jiffies + TCP_RTO_MAX; if (!fail_tout) fail_tout = 1; WRITE_ONCE(subflow->fail_tout, fail_tout); tcp_send_ack(ssk); mptcp_reset_tout_timer(msk, subflow->fail_tout); } static bool subflow_check_data_avail(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); enum mapping_status status; struct mptcp_sock *msk; struct sk_buff *skb; if (!skb_peek(&ssk->sk_receive_queue)) WRITE_ONCE(subflow->data_avail, false); if (subflow->data_avail) return true; msk = mptcp_sk(subflow->conn); for (;;) { u64 ack_seq; u64 old_ack; status = get_mapping_status(ssk, msk); trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue)); if (unlikely(status == MAPPING_INVALID || status == MAPPING_DUMMY || status == MAPPING_BAD_CSUM || status == MAPPING_NODSS)) goto fallback; if (status != MAPPING_OK) goto no_data; skb = skb_peek(&ssk->sk_receive_queue); if (WARN_ON_ONCE(!skb)) goto no_data; if (unlikely(!READ_ONCE(msk->can_ack))) goto fallback; old_ack = READ_ONCE(msk->ack_seq); ack_seq = mptcp_subflow_get_mapped_dsn(subflow); pr_debug("msk ack_seq=%llx subflow ack_seq=%llx\n", old_ack, ack_seq); if (unlikely(before64(ack_seq, old_ack))) { mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq); continue; } WRITE_ONCE(subflow->data_avail, true); break; } return true; no_data: subflow_sched_work_if_closed(msk, ssk); return false; fallback: if (!__mptcp_check_fallback(msk)) { /* RFC 8684 section 3.7. */ if (status == MAPPING_BAD_CSUM && (subflow->mp_join || subflow->valid_csum_seen)) { subflow->send_mp_fail = 1; if (!READ_ONCE(msk->allow_infinite_fallback)) { subflow->reset_transient = 0; subflow->reset_reason = MPTCP_RST_EMIDDLEBOX; goto reset; } mptcp_subflow_fail(msk, ssk); WRITE_ONCE(subflow->data_avail, true); return true; } if (!READ_ONCE(msk->allow_infinite_fallback)) { /* fatal protocol error, close the socket. * subflow_error_report() will introduce the appropriate barriers */ subflow->reset_transient = 0; subflow->reset_reason = status == MAPPING_NODSS ? MPTCP_RST_EMIDDLEBOX : MPTCP_RST_EMPTCP; reset: WRITE_ONCE(ssk->sk_err, EBADMSG); tcp_set_state(ssk, TCP_CLOSE); while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); mptcp_send_active_reset_reason(ssk); WRITE_ONCE(subflow->data_avail, false); return false; } mptcp_do_fallback(ssk); } skb = skb_peek(&ssk->sk_receive_queue); subflow->map_valid = 1; subflow->map_seq = READ_ONCE(msk->ack_seq); subflow->map_data_len = skb->len; subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; WRITE_ONCE(subflow->data_avail, true); return true; } bool mptcp_subflow_data_available(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); /* check if current mapping is still valid */ if (subflow->map_valid && mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) { subflow->map_valid = 0; WRITE_ONCE(subflow->data_avail, false); pr_debug("Done with mapping: seq=%u data_len=%u\n", subflow->map_subflow_seq, subflow->map_data_len); } return subflow_check_data_avail(sk); } /* If ssk has an mptcp parent socket, use the mptcp rcvbuf occupancy, * not the ssk one. * * In mptcp, rwin is about the mptcp-level connection data. * * Data that is still on the ssk rx queue can thus be ignored, * as far as mptcp peer is concerned that data is still inflight. * DSS ACK is updated when skb is moved to the mptcp rx queue. */ void mptcp_space(const struct sock *ssk, int *space, int *full_space) { const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); const struct sock *sk = subflow->conn; *space = __mptcp_space(sk); *full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } static void subflow_error_report(struct sock *ssk) { struct sock *sk = mptcp_subflow_ctx(ssk)->conn; /* bail early if this is a no-op, so that we avoid introducing a * problematic lockdep dependency between TCP accept queue lock * and msk socket spinlock */ if (!sk->sk_socket) return; mptcp_data_lock(sk); if (!sock_owned_by_user(sk)) __mptcp_error_report(sk); else __set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->cb_flags); mptcp_data_unlock(sk); } static void subflow_data_ready(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); u16 state = 1 << inet_sk_state_load(sk); struct sock *parent = subflow->conn; struct mptcp_sock *msk; trace_sk_data_ready(sk); msk = mptcp_sk(parent); if (state & TCPF_LISTEN) { /* MPJ subflow are removed from accept queue before reaching here, * avoid stray wakeups */ if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue)) return; parent->sk_data_ready(parent); return; } WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && !subflow->mp_join && !(state & TCPF_CLOSE)); if (mptcp_subflow_data_available(sk)) { mptcp_data_ready(parent, sk); /* subflow-level lowat test are not relevant. * respect the msk-level threshold eventually mandating an immediate ack */ if (mptcp_data_avail(msk) < parent->sk_rcvlowat && (tcp_sk(sk)->rcv_nxt - tcp_sk(sk)->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss) inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; } else if (unlikely(sk->sk_err)) { subflow_error_report(sk); } } static void subflow_write_space(struct sock *ssk) { struct sock *sk = mptcp_subflow_ctx(ssk)->conn; mptcp_propagate_sndbuf(sk, ssk); mptcp_write_space(sk); } static const struct inet_connection_sock_af_ops * subflow_default_af_ops(struct sock *sk) { #if IS_ENABLED(CONFIG_MPTCP_IPV6) if (sk->sk_family == AF_INET6) return &subflow_v6_specific; #endif return &subflow_specific; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct inet_connection_sock *icsk = inet_csk(sk); const struct inet_connection_sock_af_ops *target; target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk); pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d\n", subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped); if (likely(icsk->icsk_af_ops == target)) return; subflow->icsk_af_ops = icsk->icsk_af_ops; icsk->icsk_af_ops = target; } #endif void mptcp_info2sockaddr(const struct mptcp_addr_info *info, struct sockaddr_storage *addr, unsigned short family) { memset(addr, 0, sizeof(*addr)); addr->ss_family = family; if (addr->ss_family == AF_INET) { struct sockaddr_in *in_addr = (struct sockaddr_in *)addr; if (info->family == AF_INET) in_addr->sin_addr = info->addr; #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (ipv6_addr_v4mapped(&info->addr6)) in_addr->sin_addr.s_addr = info->addr6.s6_addr32[3]; #endif in_addr->sin_port = info->port; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (addr->ss_family == AF_INET6) { struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr; if (info->family == AF_INET) ipv6_addr_set_v4mapped(info->addr.s_addr, &in6_addr->sin6_addr); else in6_addr->sin6_addr = info->addr6; in6_addr->sin6_port = info->port; } #endif } int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local, const struct mptcp_addr_info *remote) { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_subflow_context *subflow; int local_id = local->addr.id; struct sockaddr_storage addr; int remote_id = remote->id; int err = -ENOTCONN; struct socket *sf; struct sock *ssk; u32 remote_token; int addrlen; /* The userspace PM sent the request too early? */ if (!mptcp_is_fully_established(sk)) goto err_out; err = mptcp_subflow_create_socket(sk, local->addr.family, &sf); if (err) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXCREATSKERR); pr_debug("msk=%p local=%d remote=%d create sock error: %d\n", msk, local_id, remote_id, err); goto err_out; } ssk = sf->sk; subflow = mptcp_subflow_ctx(ssk); do { get_random_bytes(&subflow->local_nonce, sizeof(u32)); } while (!subflow->local_nonce); /* if 'IPADDRANY', the ID will be set later, after the routing */ if (local->addr.family == AF_INET) { if (!local->addr.addr.s_addr) local_id = -1; #if IS_ENABLED(CONFIG_MPTCP_IPV6) } else if (sk->sk_family == AF_INET6) { if (ipv6_addr_any(&local->addr.addr6)) local_id = -1; #endif } if (local_id >= 0) subflow_set_local_id(subflow, local_id); subflow->remote_key_valid = 1; subflow->remote_key = READ_ONCE(msk->remote_key); subflow->local_key = READ_ONCE(msk->local_key); subflow->token = msk->token; mptcp_info2sockaddr(&local->addr, &addr, ssk->sk_family); addrlen = sizeof(struct sockaddr_in); #if IS_ENABLED(CONFIG_MPTCP_IPV6) if (addr.ss_family == AF_INET6) addrlen = sizeof(struct sockaddr_in6); #endif ssk->sk_bound_dev_if = local->ifindex; err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen); if (err) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXBINDERR); pr_debug("msk=%p local=%d remote=%d bind error: %d\n", msk, local_id, remote_id, err); goto failed; } mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL); pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d\n", msk, remote_token, local_id, remote_id); subflow->remote_token = remote_token; WRITE_ONCE(subflow->remote_id, remote_id); subflow->request_join = 1; subflow->request_bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP); subflow->subflow_id = msk->subflow_id++; mptcp_info2sockaddr(remote, &addr, ssk->sk_family); sock_hold(ssk); list_add_tail(&subflow->node, &msk->conn_list); err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK); if (err && err != -EINPROGRESS) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXCONNECTERR); pr_debug("msk=%p local=%d remote=%d connect error: %d\n", msk, local_id, remote_id, err); goto failed_unlink; } MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTX); /* discard the subflow socket */ mptcp_sock_graft(ssk, sk->sk_socket); iput(SOCK_INODE(sf)); WRITE_ONCE(msk->allow_infinite_fallback, false); mptcp_stop_tout_timer(sk); return 0; failed_unlink: list_del(&subflow->node); sock_put(mptcp_subflow_tcp_sock(subflow)); failed: subflow->disposable = 1; sock_release(sf); err_out: /* we account subflows before the creation, and this failures will not * be caught by sk_state_change() */ mptcp_pm_close_subflow(msk); return err; } static void mptcp_attach_cgroup(struct sock *parent, struct sock *child) { #ifdef CONFIG_SOCK_CGROUP_DATA struct sock_cgroup_data *parent_skcd = &parent->sk_cgrp_data, *child_skcd = &child->sk_cgrp_data; /* only the additional subflows created by kworkers have to be modified */ if (cgroup_id(sock_cgroup_ptr(parent_skcd)) != cgroup_id(sock_cgroup_ptr(child_skcd))) { #ifdef CONFIG_MEMCG struct mem_cgroup *memcg = parent->sk_memcg; mem_cgroup_sk_free(child); if (memcg && css_tryget(&memcg->css)) child->sk_memcg = memcg; #endif /* CONFIG_MEMCG */ cgroup_sk_free(child_skcd); *child_skcd = *parent_skcd; cgroup_sk_clone(child_skcd); } #endif /* CONFIG_SOCK_CGROUP_DATA */ } static void mptcp_subflow_ops_override(struct sock *ssk) { #if IS_ENABLED(CONFIG_MPTCP_IPV6) if (ssk->sk_prot == &tcpv6_prot) ssk->sk_prot = &tcpv6_prot_override; else #endif ssk->sk_prot = &tcp_prot_override; } static void mptcp_subflow_ops_undo_override(struct sock *ssk) { #if IS_ENABLED(CONFIG_MPTCP_IPV6) if (ssk->sk_prot == &tcpv6_prot_override) ssk->sk_prot = &tcpv6_prot; else #endif ssk->sk_prot = &tcp_prot; } int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, struct socket **new_sock) { struct mptcp_subflow_context *subflow; struct net *net = sock_net(sk); struct socket *sf; int err; /* un-accepted server sockets can reach here - on bad configuration * bail early to avoid greater trouble later */ if (unlikely(!sk->sk_socket)) return -EINVAL; err = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, &sf); if (err) return err; lock_sock_nested(sf->sk, SINGLE_DEPTH_NESTING); err = security_mptcp_add_subflow(sk, sf->sk); if (err) goto err_free; /* the newly created socket has to be in the same cgroup as its parent */ mptcp_attach_cgroup(sk, sf->sk); /* kernel sockets do not by default acquire net ref, but TCP timer * needs it. * Update ns_tracker to current stack trace and refcounted tracker. */ sk_net_refcnt_upgrade(sf->sk); err = tcp_set_ulp(sf->sk, "mptcp"); if (err) goto err_free; mptcp_sockopt_sync_locked(mptcp_sk(sk), sf->sk); release_sock(sf->sk); /* the newly created socket really belongs to the owning MPTCP * socket, even if for additional subflows the allocation is performed * by a kernel workqueue. Adjust inode references, so that the * procfs/diag interfaces really show this one belonging to the correct * user. */ SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino; SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid; SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid; subflow = mptcp_subflow_ctx(sf->sk); pr_debug("subflow=%p\n", subflow); *new_sock = sf; sock_hold(sk); subflow->conn = sk; mptcp_subflow_ops_override(sf->sk); return 0; err_free: release_sock(sf->sk); sock_release(sf); return err; } static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, gfp_t priority) { struct inet_connection_sock *icsk = inet_csk(sk); struct mptcp_subflow_context *ctx; ctx = kzalloc(sizeof(*ctx), priority); if (!ctx) return NULL; rcu_assign_pointer(icsk->icsk_ulp_data, ctx); INIT_LIST_HEAD(&ctx->node); INIT_LIST_HEAD(&ctx->delegated_node); pr_debug("subflow=%p\n", ctx); ctx->tcp_sock = sk; WRITE_ONCE(ctx->local_id, -1); return ctx; } static void __subflow_state_change(struct sock *sk) { struct socket_wq *wq; rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_all(&wq->wait); rcu_read_unlock(); } static void subflow_state_change(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct sock *parent = subflow->conn; struct mptcp_sock *msk; __subflow_state_change(sk); msk = mptcp_sk(parent); if (subflow_simultaneous_connect(sk)) { mptcp_do_fallback(sk); pr_fallback(msk); subflow->conn_finished = 1; mptcp_propagate_state(parent, sk, subflow, NULL); } /* as recvmsg() does not acquire the subflow socket for ssk selection * a fin packet carrying a DSS can be unnoticed if we don't trigger * the data available machinery here. */ if (mptcp_subflow_data_available(sk)) mptcp_data_ready(parent, sk); else if (unlikely(sk->sk_err)) subflow_error_report(sk); subflow_sched_work_if_closed(mptcp_sk(parent), sk); } void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk) { struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue; struct request_sock *req, *head, *tail; struct mptcp_subflow_context *subflow; struct sock *sk, *ssk; /* Due to lock dependencies no relevant lock can be acquired under rskq_lock. * Splice the req list, so that accept() can not reach the pending ssk after * the listener socket is released below. */ spin_lock_bh(&queue->rskq_lock); head = queue->rskq_accept_head; tail = queue->rskq_accept_tail; queue->rskq_accept_head = NULL; queue->rskq_accept_tail = NULL; spin_unlock_bh(&queue->rskq_lock); if (!head) return; /* can't acquire the msk socket lock under the subflow one, * or will cause ABBA deadlock */ release_sock(listener_ssk); for (req = head; req; req = req->dl_next) { ssk = req->sk; if (!sk_is_mptcp(ssk)) continue; subflow = mptcp_subflow_ctx(ssk); if (!subflow || !subflow->conn) continue; sk = subflow->conn; sock_hold(sk); lock_sock_nested(sk, SINGLE_DEPTH_NESTING); __mptcp_unaccepted_force_close(sk); release_sock(sk); /* lockdep will report a false positive ABBA deadlock * between cancel_work_sync and the listener socket. * The involved locks belong to different sockets WRT * the existing AB chain. * Using a per socket key is problematic as key * deregistration requires process context and must be * performed at socket disposal time, in atomic * context. * Just tell lockdep to consider the listener socket * released here. */ mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_); mptcp_cancel_work(sk); mutex_acquire(&listener_sk->sk_lock.dep_map, 0, 0, _RET_IP_); sock_put(sk); } /* we are still under the listener msk socket lock */ lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING); /* restore the listener queue, to let the TCP code clean it up */ spin_lock_bh(&queue->rskq_lock); WARN_ON_ONCE(queue->rskq_accept_head); queue->rskq_accept_head = head; queue->rskq_accept_tail = tail; spin_unlock_bh(&queue->rskq_lock); } static int subflow_ulp_init(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct mptcp_subflow_context *ctx; struct tcp_sock *tp = tcp_sk(sk); int err = 0; /* disallow attaching ULP to a socket unless it has been * created with sock_create_kern() */ if (!sk->sk_kern_sock) { err = -EOPNOTSUPP; goto out; } ctx = subflow_create_ctx(sk, GFP_KERNEL); if (!ctx) { err = -ENOMEM; goto out; } pr_debug("subflow=%p, family=%d\n", ctx, sk->sk_family); tp->is_mptcp = 1; ctx->icsk_af_ops = icsk->icsk_af_ops; icsk->icsk_af_ops = subflow_default_af_ops(sk); ctx->tcp_state_change = sk->sk_state_change; ctx->tcp_error_report = sk->sk_error_report; WARN_ON_ONCE(sk->sk_data_ready != sock_def_readable); WARN_ON_ONCE(sk->sk_write_space != sk_stream_write_space); sk->sk_data_ready = subflow_data_ready; sk->sk_write_space = subflow_write_space; sk->sk_state_change = subflow_state_change; sk->sk_error_report = subflow_error_report; out: return err; } static void subflow_ulp_release(struct sock *ssk) { struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk); bool release = true; struct sock *sk; if (!ctx) return; sk = ctx->conn; if (sk) { /* if the msk has been orphaned, keep the ctx * alive, will be freed by __mptcp_close_ssk(), * when the subflow is still unaccepted */ release = ctx->disposable || list_empty(&ctx->node); /* inet_child_forget() does not call sk_state_change(), * explicitly trigger the socket close machinery */ if (!release && !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags)) mptcp_schedule_work(sk); sock_put(sk); } mptcp_subflow_ops_undo_override(ssk); if (release) kfree_rcu(ctx, rcu); } static void subflow_ulp_clone(const struct request_sock *req, struct sock *newsk, const gfp_t priority) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk); struct mptcp_subflow_context *new_ctx; if (!tcp_rsk(req)->is_mptcp || (!subflow_req->mp_capable && !subflow_req->mp_join)) { subflow_ulp_fallback(newsk, old_ctx); return; } new_ctx = subflow_create_ctx(newsk, priority); if (!new_ctx) { subflow_ulp_fallback(newsk, old_ctx); return; } new_ctx->conn_finished = 1; new_ctx->icsk_af_ops = old_ctx->icsk_af_ops; new_ctx->tcp_state_change = old_ctx->tcp_state_change; new_ctx->tcp_error_report = old_ctx->tcp_error_report; new_ctx->rel_write_seq = 1; if (subflow_req->mp_capable) { /* see comments in subflow_syn_recv_sock(), MPTCP connection * is fully established only after we receive the remote key */ new_ctx->mp_capable = 1; new_ctx->local_key = subflow_req->local_key; new_ctx->token = subflow_req->token; new_ctx->ssn_offset = subflow_req->ssn_offset; new_ctx->idsn = subflow_req->idsn; /* this is the first subflow, id is always 0 */ subflow_set_local_id(new_ctx, 0); } else if (subflow_req->mp_join) { new_ctx->ssn_offset = subflow_req->ssn_offset; new_ctx->mp_join = 1; WRITE_ONCE(new_ctx->fully_established, true); new_ctx->remote_key_valid = 1; new_ctx->backup = subflow_req->backup; new_ctx->request_bkup = subflow_req->request_bkup; WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id); new_ctx->token = subflow_req->token; new_ctx->thmac = subflow_req->thmac; /* the subflow req id is valid, fetched via subflow_check_req() * and subflow_token_join_request() */ subflow_set_local_id(new_ctx, subflow_req->local_id); } } static void tcp_release_cb_override(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); long status; /* process and clear all the pending actions, but leave the subflow into * the napi queue. To respect locking, only the same CPU that originated * the action can touch the list. mptcp_napi_poll will take care of it. */ status = set_mask_bits(&subflow->delegated_status, MPTCP_DELEGATE_ACTIONS_MASK, 0); if (status) mptcp_subflow_process_delegated(ssk, status); tcp_release_cb(ssk); } static int tcp_abort_override(struct sock *ssk, int err) { /* closing a listener subflow requires a great deal of care. * keep it simple and just prevent such operation */ if (inet_sk_state_load(ssk) == TCP_LISTEN) return -EINVAL; return tcp_abort(ssk, err); } static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = { .name = "mptcp", .owner = THIS_MODULE, .init = subflow_ulp_init, .release = subflow_ulp_release, .clone = subflow_ulp_clone, }; static int subflow_ops_init(struct request_sock_ops *subflow_ops) { subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock); subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name, subflow_ops->obj_size, 0, SLAB_ACCOUNT | SLAB_TYPESAFE_BY_RCU, NULL); if (!subflow_ops->slab) return -ENOMEM; return 0; } void __init mptcp_subflow_init(void) { mptcp_subflow_v4_request_sock_ops = tcp_request_sock_ops; mptcp_subflow_v4_request_sock_ops.slab_name = "request_sock_subflow_v4"; mptcp_subflow_v4_request_sock_ops.destructor = subflow_v4_req_destructor; if (subflow_ops_init(&mptcp_subflow_v4_request_sock_ops) != 0) panic("MPTCP: failed to init subflow v4 request sock ops\n"); subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops; subflow_request_sock_ipv4_ops.route_req = subflow_v4_route_req; subflow_request_sock_ipv4_ops.send_synack = subflow_v4_send_synack; subflow_specific = ipv4_specific; subflow_specific.conn_request = subflow_v4_conn_request; subflow_specific.syn_recv_sock = subflow_syn_recv_sock; subflow_specific.sk_rx_dst_set = subflow_finish_connect; subflow_specific.rebuild_header = subflow_rebuild_header; tcp_prot_override = tcp_prot; tcp_prot_override.release_cb = tcp_release_cb_override; tcp_prot_override.diag_destroy = tcp_abort_override; #if IS_ENABLED(CONFIG_MPTCP_IPV6) /* In struct mptcp_subflow_request_sock, we assume the TCP request sock * structures for v4 and v6 have the same size. It should not changed in * the future but better to make sure to be warned if it is no longer * the case. */ BUILD_BUG_ON(sizeof(struct tcp_request_sock) != sizeof(struct tcp6_request_sock)); mptcp_subflow_v6_request_sock_ops = tcp6_request_sock_ops; mptcp_subflow_v6_request_sock_ops.slab_name = "request_sock_subflow_v6"; mptcp_subflow_v6_request_sock_ops.destructor = subflow_v6_req_destructor; if (subflow_ops_init(&mptcp_subflow_v6_request_sock_ops) != 0) panic("MPTCP: failed to init subflow v6 request sock ops\n"); subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops; subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req; subflow_request_sock_ipv6_ops.send_synack = subflow_v6_send_synack; subflow_v6_specific = ipv6_specific; subflow_v6_specific.conn_request = subflow_v6_conn_request; subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock; subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect; subflow_v6_specific.rebuild_header = subflow_v6_rebuild_header; subflow_v6m_specific = subflow_v6_specific; subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit; subflow_v6m_specific.send_check = ipv4_specific.send_check; subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len; subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced; subflow_v6m_specific.rebuild_header = subflow_rebuild_header; tcpv6_prot_override = tcpv6_prot; tcpv6_prot_override.release_cb = tcp_release_cb_override; tcpv6_prot_override.diag_destroy = tcp_abort_override; #endif mptcp_diag_subflow_init(&subflow_ulp_ops); if (tcp_register_ulp(&subflow_ulp_ops) != 0) panic("MPTCP: failed to register subflows to ULP\n"); } |
| 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | #ifndef LLC_CONN_H #define LLC_CONN_H /* * Copyright (c) 1997 by Procom Technology, Inc. * 2001, 2002 by Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. * This program is distributed without any warranty or implied warranty * of merchantability or fitness for a particular purpose. * * See the GNU General Public License for more details. */ #include <linux/timer.h> #include <net/llc_if.h> #include <net/sock.h> #include <linux/llc.h> #define LLC_EVENT 1 #define LLC_PACKET 2 #define LLC2_P_TIME 2 #define LLC2_ACK_TIME 1 #define LLC2_REJ_TIME 3 #define LLC2_BUSY_TIME 3 struct llc_timer { struct timer_list timer; unsigned long expire; /* timer expire time */ }; struct llc_sock { /* struct sock must be the first member of llc_sock */ struct sock sk; struct sockaddr_llc addr; /* address sock is bound to */ u8 state; /* state of connection */ struct llc_sap *sap; /* pointer to parent SAP */ struct llc_addr laddr; /* lsap/mac pair */ struct llc_addr daddr; /* dsap/mac pair */ struct net_device *dev; /* device to send to remote */ netdevice_tracker dev_tracker; u32 copied_seq; /* head of yet unread data */ u8 retry_count; /* number of retries */ u8 ack_must_be_send; u8 first_pdu_Ns; u8 npta; struct llc_timer ack_timer; struct llc_timer pf_cycle_timer; struct llc_timer rej_sent_timer; struct llc_timer busy_state_timer; /* ind busy clr at remote LLC */ u8 vS; /* seq# next in-seq I-PDU tx'd*/ u8 vR; /* seq# next in-seq I-PDU rx'd*/ u32 n2; /* max nbr re-tx's for timeout*/ u32 n1; /* max nbr octets in I PDU */ u8 k; /* tx window size; max = 127 */ u8 rw; /* rx window size; max = 127 */ u8 p_flag; /* state flags */ u8 f_flag; u8 s_flag; u8 data_flag; u8 remote_busy_flag; u8 cause_flag; struct sk_buff_head pdu_unack_q; /* PUDs sent/waiting ack */ u16 link; /* network layer link number */ u8 X; /* a temporary variable */ u8 ack_pf; /* this flag indicates what is the P-bit of acknowledge */ u8 failed_data_req; /* recognize that already exist a failed llc_data_req_handler (tx_buffer_full or unacceptable state */ u8 dec_step; u8 inc_cntr; u8 dec_cntr; u8 connect_step; u8 last_nr; /* NR of last pdu received */ u32 rx_pdu_hdr; /* used for saving header of last pdu received and caused sending FRMR. Used for resending FRMR */ u32 cmsg_flags; struct hlist_node dev_hash_node; }; static inline struct llc_sock *llc_sk(const struct sock *sk) { return (struct llc_sock *)sk; } static __inline__ void llc_set_backlog_type(struct sk_buff *skb, char type) { skb->cb[sizeof(skb->cb) - 1] = type; } static __inline__ char llc_backlog_type(struct sk_buff *skb) { return skb->cb[sizeof(skb->cb) - 1]; } struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern); void llc_sk_stop_all_timers(struct sock *sk, bool sync); void llc_sk_free(struct sock *sk); void llc_sk_reset(struct sock *sk); /* Access to a connection */ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb); void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb); void llc_conn_rtn_pdu(struct sock *sk, struct sk_buff *skb); void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit); void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit); int llc_conn_remove_acked_pdus(struct sock *conn, u8 nr, u16 *how_many_unacked); struct sock *llc_lookup_established(struct llc_sap *sap, struct llc_addr *daddr, struct llc_addr *laddr, const struct net *net); void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk); void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk); u8 llc_data_accept_state(u8 state); void llc_build_offset_table(void); #endif /* LLC_CONN_H */ |
| 19 19 19 2 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/seq_file.h> #include <net/ip.h> #include <net/mptcp.h> #include <net/snmp.h> #include <net/net_namespace.h> #include "mib.h" static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPCapableSYNRX", MPTCP_MIB_MPCAPABLEPASSIVE), SNMP_MIB_ITEM("MPCapableSYNTX", MPTCP_MIB_MPCAPABLEACTIVE), SNMP_MIB_ITEM("MPCapableSYNACKRX", MPTCP_MIB_MPCAPABLEACTIVEACK), SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK), SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK), SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK), SNMP_MIB_ITEM("MPCapableSYNTXDrop", MPTCP_MIB_MPCAPABLEACTIVEDROP), SNMP_MIB_ITEM("MPCapableSYNTXDisabled", MPTCP_MIB_MPCAPABLEACTIVEDISABLED), SNMP_MIB_ITEM("MPCapableEndpAttempt", MPTCP_MIB_MPCAPABLEENDPATTEMPT), SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT), SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS), SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN), SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX), SNMP_MIB_ITEM("MPJoinSynBackupRx", MPTCP_MIB_JOINSYNBACKUPRX), SNMP_MIB_ITEM("MPJoinSynAckRx", MPTCP_MIB_JOINSYNACKRX), SNMP_MIB_ITEM("MPJoinSynAckBackupRx", MPTCP_MIB_JOINSYNACKBACKUPRX), SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC), SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX), SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), SNMP_MIB_ITEM("MPJoinRejected", MPTCP_MIB_JOINREJECTED), SNMP_MIB_ITEM("MPJoinSynTx", MPTCP_MIB_JOINSYNTX), SNMP_MIB_ITEM("MPJoinSynTxCreatSkErr", MPTCP_MIB_JOINSYNTXCREATSKERR), SNMP_MIB_ITEM("MPJoinSynTxBindErr", MPTCP_MIB_JOINSYNTXBINDERR), SNMP_MIB_ITEM("MPJoinSynTxConnectErr", MPTCP_MIB_JOINSYNTXCONNECTERR), SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH), SNMP_MIB_ITEM("DSSCorruptionFallback", MPTCP_MIB_DSSCORRUPTIONFALLBACK), SNMP_MIB_ITEM("DSSCorruptionReset", MPTCP_MIB_DSSCORRUPTIONRESET), SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX), SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX), SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH), SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR), SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL), SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE), SNMP_MIB_ITEM("OFOMerge", MPTCP_MIB_OFOMERGE), SNMP_MIB_ITEM("NoDSSInWindow", MPTCP_MIB_NODSSWINDOW), SNMP_MIB_ITEM("DuplicateData", MPTCP_MIB_DUPDATA), SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR), SNMP_MIB_ITEM("AddAddrTx", MPTCP_MIB_ADDADDRTX), SNMP_MIB_ITEM("AddAddrTxDrop", MPTCP_MIB_ADDADDRTXDROP), SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD), SNMP_MIB_ITEM("EchoAddTx", MPTCP_MIB_ECHOADDTX), SNMP_MIB_ITEM("EchoAddTxDrop", MPTCP_MIB_ECHOADDTXDROP), SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD), SNMP_MIB_ITEM("AddAddrDrop", MPTCP_MIB_ADDADDRDROP), SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX), SNMP_MIB_ITEM("MPJoinPortSynAckRx", MPTCP_MIB_JOINPORTSYNACKRX), SNMP_MIB_ITEM("MPJoinPortAckRx", MPTCP_MIB_JOINPORTACKRX), SNMP_MIB_ITEM("MismatchPortSynRx", MPTCP_MIB_MISMATCHPORTSYNRX), SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX), SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR), SNMP_MIB_ITEM("RmAddrDrop", MPTCP_MIB_RMADDRDROP), SNMP_MIB_ITEM("RmAddrTx", MPTCP_MIB_RMADDRTX), SNMP_MIB_ITEM("RmAddrTxDrop", MPTCP_MIB_RMADDRTXDROP), SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), SNMP_MIB_ITEM("MPFailTx", MPTCP_MIB_MPFAILTX), SNMP_MIB_ITEM("MPFailRx", MPTCP_MIB_MPFAILRX), SNMP_MIB_ITEM("MPFastcloseTx", MPTCP_MIB_MPFASTCLOSETX), SNMP_MIB_ITEM("MPFastcloseRx", MPTCP_MIB_MPFASTCLOSERX), SNMP_MIB_ITEM("MPRstTx", MPTCP_MIB_MPRSTTX), SNMP_MIB_ITEM("MPRstRx", MPTCP_MIB_MPRSTRX), SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED), SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE), SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER), SNMP_MIB_ITEM("SndWndShared", MPTCP_MIB_SNDWNDSHARED), SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED), SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE), SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT), SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB), SNMP_MIB_ITEM("Blackhole", MPTCP_MIB_BLACKHOLE), SNMP_MIB_SENTINEL }; /* mptcp_mib_alloc - allocate percpu mib counters * * These are allocated when the first mptcp socket is created so * we do not waste percpu memory if mptcp isn't in use. */ bool mptcp_mib_alloc(struct net *net) { struct mptcp_mib __percpu *mib = alloc_percpu(struct mptcp_mib); if (!mib) return false; if (cmpxchg(&net->mib.mptcp_statistics, NULL, mib)) free_percpu(mib); return true; } void mptcp_seq_show(struct seq_file *seq) { unsigned long sum[ARRAY_SIZE(mptcp_snmp_list) - 1]; struct net *net = seq->private; int i; seq_puts(seq, "MPTcpExt:"); for (i = 0; mptcp_snmp_list[i].name; i++) seq_printf(seq, " %s", mptcp_snmp_list[i].name); seq_puts(seq, "\nMPTcpExt:"); memset(sum, 0, sizeof(sum)); if (net->mib.mptcp_statistics) snmp_get_cpu_field_batch(sum, mptcp_snmp_list, net->mib.mptcp_statistics); for (i = 0; mptcp_snmp_list[i].name; i++) seq_printf(seq, " %lu", sum[i]); seq_putc(seq, '\n'); } |
| 4233 4240 5 5 3 2 1 4 4 3 3 2 2 2 1 3 1 3 5 4 2 2 1 1 1 4 1 245 243 1 1 1 449 106 10 448 450 22 106 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 | // SPDX-License-Identifier: GPL-2.0-or-later /* * xfrm_device.c - IPsec device offloading code. * * Copyright (c) 2015 secunet Security Networks AG * * Author: * Steffen Klassert <steffen.klassert@secunet.com> */ #include <linux/errno.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <net/dst.h> #include <net/gso.h> #include <net/xfrm.h> #include <linux/notifier.h> #ifdef CONFIG_XFRM_OFFLOAD static void __xfrm_transport_prep(struct xfrm_state *x, struct sk_buff *skb, unsigned int hsize) { struct xfrm_offload *xo = xfrm_offload(skb); skb_reset_mac_len(skb); if (xo->flags & XFRM_GSO_SEGMENT) skb->transport_header -= x->props.header_len; pskb_pull(skb, skb_transport_offset(skb) + x->props.header_len); } static void __xfrm_mode_tunnel_prep(struct xfrm_state *x, struct sk_buff *skb, unsigned int hsize) { struct xfrm_offload *xo = xfrm_offload(skb); if (xo->flags & XFRM_GSO_SEGMENT) skb->transport_header = skb->network_header + hsize; skb_reset_mac_len(skb); pskb_pull(skb, skb->mac_len + x->props.header_len - x->props.enc_hdr_len); } static void __xfrm_mode_beet_prep(struct xfrm_state *x, struct sk_buff *skb, unsigned int hsize) { struct xfrm_offload *xo = xfrm_offload(skb); int phlen = 0; if (xo->flags & XFRM_GSO_SEGMENT) skb->transport_header = skb->network_header + hsize; skb_reset_mac_len(skb); if (x->sel.family != AF_INET6) { phlen = IPV4_BEET_PHMAXLEN; if (x->outer_mode.family == AF_INET6) phlen += sizeof(struct ipv6hdr) - sizeof(struct iphdr); } pskb_pull(skb, skb->mac_len + hsize + (x->props.header_len - phlen)); } /* Adjust pointers into the packet when IPsec is done at layer2 */ static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb) { switch (x->outer_mode.encap) { case XFRM_MODE_IPTFS: case XFRM_MODE_TUNNEL: if (x->outer_mode.family == AF_INET) return __xfrm_mode_tunnel_prep(x, skb, sizeof(struct iphdr)); if (x->outer_mode.family == AF_INET6) return __xfrm_mode_tunnel_prep(x, skb, sizeof(struct ipv6hdr)); break; case XFRM_MODE_TRANSPORT: if (x->outer_mode.family == AF_INET) return __xfrm_transport_prep(x, skb, sizeof(struct iphdr)); if (x->outer_mode.family == AF_INET6) return __xfrm_transport_prep(x, skb, sizeof(struct ipv6hdr)); break; case XFRM_MODE_BEET: if (x->outer_mode.family == AF_INET) return __xfrm_mode_beet_prep(x, skb, sizeof(struct iphdr)); if (x->outer_mode.family == AF_INET6) return __xfrm_mode_beet_prep(x, skb, sizeof(struct ipv6hdr)); break; case XFRM_MODE_ROUTEOPTIMIZATION: case XFRM_MODE_IN_TRIGGER: break; } } static inline bool xmit_xfrm_check_overflow(struct sk_buff *skb) { struct xfrm_offload *xo = xfrm_offload(skb); __u32 seq = xo->seq.low; seq += skb_shinfo(skb)->gso_segs; if (unlikely(seq < xo->seq.low)) return true; return false; } struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again) { int err; unsigned long flags; struct xfrm_state *x; struct softnet_data *sd; struct sk_buff *skb2, *nskb, *pskb = NULL; netdev_features_t esp_features = features; struct xfrm_offload *xo = xfrm_offload(skb); struct net_device *dev = skb->dev; struct sec_path *sp; if (!xo || (xo->flags & XFRM_XMIT)) return skb; if (!(features & NETIF_F_HW_ESP)) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); sp = skb_sec_path(skb); x = sp->xvec[sp->len - 1]; if (xo->flags & XFRM_GRO || x->xso.dir == XFRM_DEV_OFFLOAD_IN) return skb; /* The packet was sent to HW IPsec packet offload engine, * but to wrong device. Drop the packet, so it won't skip * XFRM stack. */ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && x->xso.dev != dev) { kfree_skb(skb); dev_core_stats_tx_dropped_inc(dev); return NULL; } local_irq_save(flags); sd = this_cpu_ptr(&softnet_data); err = !skb_queue_empty(&sd->xfrm_backlog); local_irq_restore(flags); if (err) { *again = true; return skb; } if (skb_is_gso(skb) && unlikely(xmit_xfrm_check_overflow(skb))) { struct sk_buff *segs; /* Packet got rerouted, fixup features and segment it. */ esp_features = esp_features & ~(NETIF_F_HW_ESP | NETIF_F_GSO_ESP); segs = skb_gso_segment(skb, esp_features); if (IS_ERR(segs)) { kfree_skb(skb); dev_core_stats_tx_dropped_inc(dev); return NULL; } else { consume_skb(skb); skb = segs; } } if (!skb->next) { esp_features |= skb->dev->gso_partial_features; xfrm_outer_mode_prep(x, skb); xo->flags |= XFRM_DEV_RESUME; err = x->type_offload->xmit(x, skb, esp_features); if (err) { if (err == -EINPROGRESS) return NULL; XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); kfree_skb(skb); return NULL; } skb_push(skb, skb->data - skb_mac_header(skb)); return skb; } skb_list_walk_safe(skb, skb2, nskb) { esp_features |= skb->dev->gso_partial_features; skb_mark_not_on_list(skb2); xo = xfrm_offload(skb2); xo->flags |= XFRM_DEV_RESUME; xfrm_outer_mode_prep(x, skb2); err = x->type_offload->xmit(x, skb2, esp_features); if (!err) { skb2->next = nskb; } else if (err != -EINPROGRESS) { XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); skb2->next = nskb; kfree_skb_list(skb2); return NULL; } else { if (skb == skb2) skb = nskb; else pskb->next = nskb; continue; } skb_push(skb2, skb2->data - skb_mac_header(skb2)); pskb = skb2; } return skb; } EXPORT_SYMBOL_GPL(validate_xmit_xfrm); int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo, struct netlink_ext_ack *extack) { int err; struct dst_entry *dst; struct net_device *dev; struct xfrm_dev_offload *xso = &x->xso; xfrm_address_t *saddr; xfrm_address_t *daddr; bool is_packet_offload; if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND | XFRM_OFFLOAD_PACKET)) { NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request"); return -EINVAL; } if ((xuo->flags & XFRM_OFFLOAD_INBOUND && x->dir == XFRM_SA_DIR_OUT) || (!(xuo->flags & XFRM_OFFLOAD_INBOUND) && x->dir == XFRM_SA_DIR_IN)) { NL_SET_ERR_MSG(extack, "Mismatched SA and offload direction"); return -EINVAL; } if (xuo->flags & XFRM_OFFLOAD_INBOUND && x->if_id) { NL_SET_ERR_MSG(extack, "XFRM if_id is not supported in RX path"); return -EINVAL; } is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET; /* We don't yet support TFC padding. */ if (x->tfcpad) { NL_SET_ERR_MSG(extack, "TFC padding can't be offloaded"); return -EINVAL; } dev = dev_get_by_index(net, xuo->ifindex); if (!dev) { struct xfrm_dst_lookup_params params; if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) { saddr = &x->props.saddr; daddr = &x->id.daddr; } else { saddr = &x->id.daddr; daddr = &x->props.saddr; } memset(¶ms, 0, sizeof(params)); params.net = net; params.saddr = saddr; params.daddr = daddr; params.mark = xfrm_smark_get(0, x); dst = __xfrm_dst_lookup(x->props.family, ¶ms); if (IS_ERR(dst)) return (is_packet_offload) ? -EINVAL : 0; dev = dst->dev; dev_hold(dev); dst_release(dst); } if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) { xso->dev = NULL; dev_put(dev); return (is_packet_offload) ? -EINVAL : 0; } if (!is_packet_offload && x->props.flags & XFRM_STATE_ESN && !dev->xfrmdev_ops->xdo_dev_state_advance_esn) { NL_SET_ERR_MSG(extack, "Device doesn't support offload with ESN"); xso->dev = NULL; dev_put(dev); return -EINVAL; } xfrm_set_type_offload(x); if (!x->type_offload) { NL_SET_ERR_MSG(extack, "Type doesn't support offload"); dev_put(dev); return -EINVAL; } xso->dev = dev; netdev_tracker_alloc(dev, &xso->dev_tracker, GFP_ATOMIC); if (xuo->flags & XFRM_OFFLOAD_INBOUND) xso->dir = XFRM_DEV_OFFLOAD_IN; else xso->dir = XFRM_DEV_OFFLOAD_OUT; if (is_packet_offload) xso->type = XFRM_DEV_OFFLOAD_PACKET; else xso->type = XFRM_DEV_OFFLOAD_CRYPTO; err = dev->xfrmdev_ops->xdo_dev_state_add(dev, x, extack); if (err) { xso->dev = NULL; xso->dir = 0; netdev_put(dev, &xso->dev_tracker); xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; xfrm_unset_type_offload(x); /* User explicitly requested packet offload mode and configured * policy in addition to the XFRM state. So be civil to users, * and return an error instead of taking fallback path. */ if ((err != -EOPNOTSUPP && !is_packet_offload) || is_packet_offload) { NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this state"); return err; } } return 0; } EXPORT_SYMBOL_GPL(xfrm_dev_state_add); int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp, struct xfrm_user_offload *xuo, u8 dir, struct netlink_ext_ack *extack) { struct xfrm_dev_offload *xdo = &xp->xdo; struct net_device *dev; int err; if (!xuo->flags || xuo->flags & ~XFRM_OFFLOAD_PACKET) { /* We support only packet offload mode and it means * that user must set XFRM_OFFLOAD_PACKET bit. */ NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request"); return -EINVAL; } dev = dev_get_by_index(net, xuo->ifindex); if (!dev) return -EINVAL; if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_policy_add) { xdo->dev = NULL; dev_put(dev); NL_SET_ERR_MSG(extack, "Policy offload is not supported"); return -EINVAL; } xdo->dev = dev; netdev_tracker_alloc(dev, &xdo->dev_tracker, GFP_ATOMIC); xdo->type = XFRM_DEV_OFFLOAD_PACKET; switch (dir) { case XFRM_POLICY_IN: xdo->dir = XFRM_DEV_OFFLOAD_IN; break; case XFRM_POLICY_OUT: xdo->dir = XFRM_DEV_OFFLOAD_OUT; break; case XFRM_POLICY_FWD: xdo->dir = XFRM_DEV_OFFLOAD_FWD; break; default: xdo->dev = NULL; netdev_put(dev, &xdo->dev_tracker); NL_SET_ERR_MSG(extack, "Unrecognized offload direction"); return -EINVAL; } err = dev->xfrmdev_ops->xdo_dev_policy_add(xp, extack); if (err) { xdo->dev = NULL; xdo->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; xdo->dir = 0; netdev_put(dev, &xdo->dev_tracker); NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this policy"); return err; } return 0; } EXPORT_SYMBOL_GPL(xfrm_dev_policy_add); bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) { int mtu; struct dst_entry *dst = skb_dst(skb); struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct net_device *dev = x->xso.dev; bool check_tunnel_size; if (x->xso.type == XFRM_DEV_OFFLOAD_UNSPECIFIED) return false; if ((dev == xfrm_dst_path(dst)->dev) && !xdst->child->xfrm) { mtu = xfrm_state_mtu(x, xdst->child_mtu_cached); if (skb->len <= mtu) goto ok; if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) goto ok; } return false; ok: check_tunnel_size = x->xso.type == XFRM_DEV_OFFLOAD_PACKET && x->props.mode == XFRM_MODE_TUNNEL; switch (x->props.family) { case AF_INET: /* Check for IPv4 options */ if (ip_hdr(skb)->ihl != 5) return false; if (check_tunnel_size && xfrm4_tunnel_check_size(skb)) return false; break; case AF_INET6: /* Check for IPv6 extensions */ if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr)) return false; if (check_tunnel_size && xfrm6_tunnel_check_size(skb)) return false; break; default: break; } if (dev->xfrmdev_ops->xdo_dev_offload_ok) return dev->xfrmdev_ops->xdo_dev_offload_ok(skb, x); return true; } EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok); void xfrm_dev_resume(struct sk_buff *skb) { struct net_device *dev = skb->dev; int ret = NETDEV_TX_BUSY; struct netdev_queue *txq; struct softnet_data *sd; unsigned long flags; rcu_read_lock(); txq = netdev_core_pick_tx(dev, skb, NULL); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) skb = dev_hard_start_xmit(skb, dev, txq, &ret); HARD_TX_UNLOCK(dev, txq); if (!dev_xmit_complete(ret)) { local_irq_save(flags); sd = this_cpu_ptr(&softnet_data); skb_queue_tail(&sd->xfrm_backlog, skb); raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } rcu_read_unlock(); } EXPORT_SYMBOL_GPL(xfrm_dev_resume); void xfrm_dev_backlog(struct softnet_data *sd) { struct sk_buff_head *xfrm_backlog = &sd->xfrm_backlog; struct sk_buff_head list; struct sk_buff *skb; if (skb_queue_empty(xfrm_backlog)) return; __skb_queue_head_init(&list); spin_lock(&xfrm_backlog->lock); skb_queue_splice_init(xfrm_backlog, &list); spin_unlock(&xfrm_backlog->lock); while (!skb_queue_empty(&list)) { skb = __skb_dequeue(&list); xfrm_dev_resume(skb); } } #endif static int xfrm_api_check(struct net_device *dev) { #ifdef CONFIG_XFRM_OFFLOAD if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) && !(dev->features & NETIF_F_HW_ESP)) return NOTIFY_BAD; if ((dev->features & NETIF_F_HW_ESP) && (!(dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_state_add && dev->xfrmdev_ops->xdo_dev_state_delete))) return NOTIFY_BAD; #else if (dev->features & (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM)) return NOTIFY_BAD; #endif return NOTIFY_DONE; } static int xfrm_dev_down(struct net_device *dev) { if (dev->features & NETIF_F_HW_ESP) { xfrm_dev_state_flush(dev_net(dev), dev, true); xfrm_dev_policy_flush(dev_net(dev), dev, true); } return NOTIFY_DONE; } static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_REGISTER: return xfrm_api_check(dev); case NETDEV_FEAT_CHANGE: return xfrm_api_check(dev); case NETDEV_DOWN: case NETDEV_UNREGISTER: return xfrm_dev_down(dev); } return NOTIFY_DONE; } static struct notifier_block xfrm_dev_notifier = { .notifier_call = xfrm_dev_event, }; void __init xfrm_dev_init(void) { register_netdevice_notifier(&xfrm_dev_notifier); } |
| 4 3 3 3 3 3 3 4 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 | // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2010-2011 EIA Electronics, // Kurt Van Dijck <kurt.van.dijck@eia.be> // Copyright (c) 2018 Protonic, // Robin van der Gracht <robin@protonic.nl> // Copyright (c) 2017-2019 Pengutronix, // Marc Kleine-Budde <kernel@pengutronix.de> // Copyright (c) 2017-2019 Pengutronix, // Oleksij Rempel <kernel@pengutronix.de> #include <linux/can/skb.h> #include "j1939-priv.h" #define J1939_XTP_TX_RETRY_LIMIT 100 #define J1939_ETP_PGN_CTL 0xc800 #define J1939_ETP_PGN_DAT 0xc700 #define J1939_TP_PGN_CTL 0xec00 #define J1939_TP_PGN_DAT 0xeb00 #define J1939_TP_CMD_RTS 0x10 #define J1939_TP_CMD_CTS 0x11 #define J1939_TP_CMD_EOMA 0x13 #define J1939_TP_CMD_BAM 0x20 #define J1939_TP_CMD_ABORT 0xff #define J1939_ETP_CMD_RTS 0x14 #define J1939_ETP_CMD_CTS 0x15 #define J1939_ETP_CMD_DPO 0x16 #define J1939_ETP_CMD_EOMA 0x17 #define J1939_ETP_CMD_ABORT 0xff enum j1939_xtp_abort { J1939_XTP_NO_ABORT = 0, J1939_XTP_ABORT_BUSY = 1, /* Already in one or more connection managed sessions and * cannot support another. * * EALREADY: * Operation already in progress */ J1939_XTP_ABORT_RESOURCE = 2, /* System resources were needed for another task so this * connection managed session was terminated. * * EMSGSIZE: * The socket type requires that message be sent atomically, * and the size of the message to be sent made this * impossible. */ J1939_XTP_ABORT_TIMEOUT = 3, /* A timeout occurred and this is the connection abort to * close the session. * * EHOSTUNREACH: * The destination host cannot be reached (probably because * the host is down or a remote router cannot reach it). */ J1939_XTP_ABORT_GENERIC = 4, /* CTS messages received when data transfer is in progress * * EBADMSG: * Not a data message */ J1939_XTP_ABORT_FAULT = 5, /* Maximal retransmit request limit reached * * ENOTRECOVERABLE: * State not recoverable */ J1939_XTP_ABORT_UNEXPECTED_DATA = 6, /* Unexpected data transfer packet * * ENOTCONN: * Transport endpoint is not connected */ J1939_XTP_ABORT_BAD_SEQ = 7, /* Bad sequence number (and software is not able to recover) * * EILSEQ: * Illegal byte sequence */ J1939_XTP_ABORT_DUP_SEQ = 8, /* Duplicate sequence number (and software is not able to * recover) */ J1939_XTP_ABORT_EDPO_UNEXPECTED = 9, /* Unexpected EDPO packet (ETP) or Message size > 1785 bytes * (TP) */ J1939_XTP_ABORT_BAD_EDPO_PGN = 10, /* Unexpected EDPO PGN (PGN in EDPO is bad) */ J1939_XTP_ABORT_EDPO_OUTOF_CTS = 11, /* EDPO number of packets is greater than CTS */ J1939_XTP_ABORT_BAD_EDPO_OFFSET = 12, /* Bad EDPO offset */ J1939_XTP_ABORT_OTHER_DEPRECATED = 13, /* Deprecated. Use 250 instead (Any other reason) */ J1939_XTP_ABORT_ECTS_UNXPECTED_PGN = 14, /* Unexpected ECTS PGN (PGN in ECTS is bad) */ J1939_XTP_ABORT_ECTS_TOO_BIG = 15, /* ECTS requested packets exceeds message size */ J1939_XTP_ABORT_OTHER = 250, /* Any other reason (if a Connection Abort reason is * identified that is not listed in the table use code 250) */ }; static unsigned int j1939_tp_block = 255; static unsigned int j1939_tp_packet_delay; static unsigned int j1939_tp_padding = 1; /* helpers */ static const char *j1939_xtp_abort_to_str(enum j1939_xtp_abort abort) { switch (abort) { case J1939_XTP_ABORT_BUSY: return "Already in one or more connection managed sessions and cannot support another."; case J1939_XTP_ABORT_RESOURCE: return "System resources were needed for another task so this connection managed session was terminated."; case J1939_XTP_ABORT_TIMEOUT: return "A timeout occurred and this is the connection abort to close the session."; case J1939_XTP_ABORT_GENERIC: return "CTS messages received when data transfer is in progress"; case J1939_XTP_ABORT_FAULT: return "Maximal retransmit request limit reached"; case J1939_XTP_ABORT_UNEXPECTED_DATA: return "Unexpected data transfer packet"; case J1939_XTP_ABORT_BAD_SEQ: return "Bad sequence number (and software is not able to recover)"; case J1939_XTP_ABORT_DUP_SEQ: return "Duplicate sequence number (and software is not able to recover)"; case J1939_XTP_ABORT_EDPO_UNEXPECTED: return "Unexpected EDPO packet (ETP) or Message size > 1785 bytes (TP)"; case J1939_XTP_ABORT_BAD_EDPO_PGN: return "Unexpected EDPO PGN (PGN in EDPO is bad)"; case J1939_XTP_ABORT_EDPO_OUTOF_CTS: return "EDPO number of packets is greater than CTS"; case J1939_XTP_ABORT_BAD_EDPO_OFFSET: return "Bad EDPO offset"; case J1939_XTP_ABORT_OTHER_DEPRECATED: return "Deprecated. Use 250 instead (Any other reason)"; case J1939_XTP_ABORT_ECTS_UNXPECTED_PGN: return "Unexpected ECTS PGN (PGN in ECTS is bad)"; case J1939_XTP_ABORT_ECTS_TOO_BIG: return "ECTS requested packets exceeds message size"; case J1939_XTP_ABORT_OTHER: return "Any other reason (if a Connection Abort reason is identified that is not listed in the table use code 250)"; default: return "<unknown>"; } } static int j1939_xtp_abort_to_errno(struct j1939_priv *priv, enum j1939_xtp_abort abort) { int err; switch (abort) { case J1939_XTP_NO_ABORT: WARN_ON_ONCE(abort == J1939_XTP_NO_ABORT); err = 0; break; case J1939_XTP_ABORT_BUSY: err = EALREADY; break; case J1939_XTP_ABORT_RESOURCE: err = EMSGSIZE; break; case J1939_XTP_ABORT_TIMEOUT: err = EHOSTUNREACH; break; case J1939_XTP_ABORT_GENERIC: err = EBADMSG; break; case J1939_XTP_ABORT_FAULT: err = ENOTRECOVERABLE; break; case J1939_XTP_ABORT_UNEXPECTED_DATA: err = ENOTCONN; break; case J1939_XTP_ABORT_BAD_SEQ: err = EILSEQ; break; case J1939_XTP_ABORT_DUP_SEQ: err = EPROTO; break; case J1939_XTP_ABORT_EDPO_UNEXPECTED: err = EPROTO; break; case J1939_XTP_ABORT_BAD_EDPO_PGN: err = EPROTO; break; case J1939_XTP_ABORT_EDPO_OUTOF_CTS: err = EPROTO; break; case J1939_XTP_ABORT_BAD_EDPO_OFFSET: err = EPROTO; break; case J1939_XTP_ABORT_OTHER_DEPRECATED: err = EPROTO; break; case J1939_XTP_ABORT_ECTS_UNXPECTED_PGN: err = EPROTO; break; case J1939_XTP_ABORT_ECTS_TOO_BIG: err = EPROTO; break; case J1939_XTP_ABORT_OTHER: err = EPROTO; break; default: netdev_warn(priv->ndev, "Unknown abort code %i", abort); err = EPROTO; } return err; } static inline void j1939_session_list_lock(struct j1939_priv *priv) { spin_lock_bh(&priv->active_session_list_lock); } static inline void j1939_session_list_unlock(struct j1939_priv *priv) { spin_unlock_bh(&priv->active_session_list_lock); } void j1939_session_get(struct j1939_session *session) { kref_get(&session->kref); } /* session completion functions */ static void __j1939_session_drop(struct j1939_session *session) { if (!session->transmission) return; j1939_sock_pending_del(session->sk); sock_put(session->sk); } static void j1939_session_destroy(struct j1939_session *session) { struct sk_buff *skb; if (session->transmission) { if (session->err) j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ABORT); else j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ACK); } else if (session->err) { j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); } netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); WARN_ON_ONCE(!list_empty(&session->sk_session_queue_entry)); WARN_ON_ONCE(!list_empty(&session->active_session_list_entry)); while ((skb = skb_dequeue(&session->skb_queue)) != NULL) { /* drop ref taken in j1939_session_skb_queue() */ skb_unref(skb); kfree_skb(skb); } __j1939_session_drop(session); j1939_priv_put(session->priv); kfree(session); } static void __j1939_session_release(struct kref *kref) { struct j1939_session *session = container_of(kref, struct j1939_session, kref); j1939_session_destroy(session); } void j1939_session_put(struct j1939_session *session) { kref_put(&session->kref, __j1939_session_release); } static void j1939_session_txtimer_cancel(struct j1939_session *session) { if (hrtimer_cancel(&session->txtimer)) j1939_session_put(session); } static void j1939_session_rxtimer_cancel(struct j1939_session *session) { if (hrtimer_cancel(&session->rxtimer)) j1939_session_put(session); } void j1939_session_timers_cancel(struct j1939_session *session) { j1939_session_txtimer_cancel(session); j1939_session_rxtimer_cancel(session); } static inline bool j1939_cb_is_broadcast(const struct j1939_sk_buff_cb *skcb) { return (!skcb->addr.dst_name && (skcb->addr.da == 0xff)); } static void j1939_session_skb_drop_old(struct j1939_session *session) { struct sk_buff *do_skb; struct j1939_sk_buff_cb *do_skcb; unsigned int offset_start; unsigned long flags; if (skb_queue_len(&session->skb_queue) < 2) return; offset_start = session->pkt.tx_acked * 7; spin_lock_irqsave(&session->skb_queue.lock, flags); do_skb = skb_peek(&session->skb_queue); do_skcb = j1939_skb_to_cb(do_skb); if ((do_skcb->offset + do_skb->len) < offset_start) { __skb_unlink(do_skb, &session->skb_queue); /* drop ref taken in j1939_session_skb_queue() */ skb_unref(do_skb); spin_unlock_irqrestore(&session->skb_queue.lock, flags); kfree_skb(do_skb); } else { spin_unlock_irqrestore(&session->skb_queue.lock, flags); } } void j1939_session_skb_queue(struct j1939_session *session, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_priv *priv = session->priv; j1939_ac_fixup(priv, skb); if (j1939_address_is_unicast(skcb->addr.da) && priv->ents[skcb->addr.da].nusers) skcb->flags |= J1939_ECU_LOCAL_DST; skcb->flags |= J1939_ECU_LOCAL_SRC; skb_get(skb); skb_queue_tail(&session->skb_queue, skb); } static struct sk_buff *j1939_session_skb_get_by_offset(struct j1939_session *session, unsigned int offset_start) { struct j1939_priv *priv = session->priv; struct j1939_sk_buff_cb *do_skcb; struct sk_buff *skb = NULL; struct sk_buff *do_skb; unsigned long flags; spin_lock_irqsave(&session->skb_queue.lock, flags); skb_queue_walk(&session->skb_queue, do_skb) { do_skcb = j1939_skb_to_cb(do_skb); if ((offset_start >= do_skcb->offset && offset_start < (do_skcb->offset + do_skb->len)) || (offset_start == 0 && do_skcb->offset == 0 && do_skb->len == 0)) { skb = do_skb; } } if (skb) skb_get(skb); spin_unlock_irqrestore(&session->skb_queue.lock, flags); if (!skb) netdev_dbg(priv->ndev, "%s: 0x%p: no skb found for start: %i, queue size: %i\n", __func__, session, offset_start, skb_queue_len(&session->skb_queue)); return skb; } static struct sk_buff *j1939_session_skb_get(struct j1939_session *session) { unsigned int offset_start; offset_start = session->pkt.dpo * 7; return j1939_session_skb_get_by_offset(session, offset_start); } /* see if we are receiver * returns 0 for broadcasts, although we will receive them */ static inline int j1939_tp_im_receiver(const struct j1939_sk_buff_cb *skcb) { return skcb->flags & J1939_ECU_LOCAL_DST; } /* see if we are sender */ static inline int j1939_tp_im_transmitter(const struct j1939_sk_buff_cb *skcb) { return skcb->flags & J1939_ECU_LOCAL_SRC; } /* see if we are involved as either receiver or transmitter */ static int j1939_tp_im_involved(const struct j1939_sk_buff_cb *skcb, bool swap) { if (swap) return j1939_tp_im_receiver(skcb); else return j1939_tp_im_transmitter(skcb); } static int j1939_tp_im_involved_anydir(struct j1939_sk_buff_cb *skcb) { return skcb->flags & (J1939_ECU_LOCAL_SRC | J1939_ECU_LOCAL_DST); } /* extract pgn from flow-ctl message */ static inline pgn_t j1939_xtp_ctl_to_pgn(const u8 *dat) { pgn_t pgn; pgn = (dat[7] << 16) | (dat[6] << 8) | (dat[5] << 0); if (j1939_pgn_is_pdu1(pgn)) pgn &= 0xffff00; return pgn; } static inline unsigned int j1939_tp_ctl_to_size(const u8 *dat) { return (dat[2] << 8) + (dat[1] << 0); } static inline unsigned int j1939_etp_ctl_to_packet(const u8 *dat) { return (dat[4] << 16) | (dat[3] << 8) | (dat[2] << 0); } static inline unsigned int j1939_etp_ctl_to_size(const u8 *dat) { return (dat[4] << 24) | (dat[3] << 16) | (dat[2] << 8) | (dat[1] << 0); } /* find existing session: * reverse: swap cb's src & dst * there is no problem with matching broadcasts, since * broadcasts (no dst, no da) would never call this * with reverse == true */ static bool j1939_session_match(struct j1939_addr *se_addr, struct j1939_addr *sk_addr, bool reverse) { if (se_addr->type != sk_addr->type) return false; if (reverse) { if (se_addr->src_name) { if (se_addr->src_name != sk_addr->dst_name) return false; } else if (se_addr->sa != sk_addr->da) { return false; } if (se_addr->dst_name) { if (se_addr->dst_name != sk_addr->src_name) return false; } else if (se_addr->da != sk_addr->sa) { return false; } } else { if (se_addr->src_name) { if (se_addr->src_name != sk_addr->src_name) return false; } else if (se_addr->sa != sk_addr->sa) { return false; } if (se_addr->dst_name) { if (se_addr->dst_name != sk_addr->dst_name) return false; } else if (se_addr->da != sk_addr->da) { return false; } } return true; } static struct j1939_session *j1939_session_get_by_addr_locked(struct j1939_priv *priv, struct list_head *root, struct j1939_addr *addr, bool reverse, bool transmitter) { struct j1939_session *session; lockdep_assert_held(&priv->active_session_list_lock); list_for_each_entry(session, root, active_session_list_entry) { j1939_session_get(session); if (j1939_session_match(&session->skcb.addr, addr, reverse) && session->transmission == transmitter) return session; j1939_session_put(session); } return NULL; } static struct j1939_session *j1939_session_get_simple(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_session *session; lockdep_assert_held(&priv->active_session_list_lock); list_for_each_entry(session, &priv->active_session_list, active_session_list_entry) { j1939_session_get(session); if (session->skcb.addr.type == J1939_SIMPLE && session->tskey == skcb->tskey && session->sk == skb->sk) return session; j1939_session_put(session); } return NULL; } static struct j1939_session *j1939_session_get_by_addr(struct j1939_priv *priv, struct j1939_addr *addr, bool reverse, bool transmitter) { struct j1939_session *session; j1939_session_list_lock(priv); session = j1939_session_get_by_addr_locked(priv, &priv->active_session_list, addr, reverse, transmitter); j1939_session_list_unlock(priv); return session; } static void j1939_skbcb_swap(struct j1939_sk_buff_cb *skcb) { u8 tmp = 0; swap(skcb->addr.dst_name, skcb->addr.src_name); swap(skcb->addr.da, skcb->addr.sa); /* swap SRC and DST flags, leave other untouched */ if (skcb->flags & J1939_ECU_LOCAL_SRC) tmp |= J1939_ECU_LOCAL_DST; if (skcb->flags & J1939_ECU_LOCAL_DST) tmp |= J1939_ECU_LOCAL_SRC; skcb->flags &= ~(J1939_ECU_LOCAL_SRC | J1939_ECU_LOCAL_DST); skcb->flags |= tmp; } static struct sk_buff *j1939_tp_tx_dat_new(struct j1939_priv *priv, const struct j1939_sk_buff_cb *re_skcb, bool ctl, bool swap_src_dst) { struct sk_buff *skb; struct j1939_sk_buff_cb *skcb; skb = alloc_skb(sizeof(struct can_frame) + sizeof(struct can_skb_priv), GFP_ATOMIC); if (unlikely(!skb)) return ERR_PTR(-ENOMEM); skb->dev = priv->ndev; can_skb_reserve(skb); can_skb_prv(skb)->ifindex = priv->ndev->ifindex; can_skb_prv(skb)->skbcnt = 0; /* reserve CAN header */ skb_reserve(skb, offsetof(struct can_frame, data)); /* skb->cb must be large enough to hold a j1939_sk_buff_cb structure */ BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*re_skcb)); memcpy(skb->cb, re_skcb, sizeof(*re_skcb)); skcb = j1939_skb_to_cb(skb); if (swap_src_dst) j1939_skbcb_swap(skcb); if (ctl) { if (skcb->addr.type == J1939_ETP) skcb->addr.pgn = J1939_ETP_PGN_CTL; else skcb->addr.pgn = J1939_TP_PGN_CTL; } else { if (skcb->addr.type == J1939_ETP) skcb->addr.pgn = J1939_ETP_PGN_DAT; else skcb->addr.pgn = J1939_TP_PGN_DAT; } return skb; } /* TP transmit packet functions */ static int j1939_tp_tx_dat(struct j1939_session *session, const u8 *dat, int len) { struct j1939_priv *priv = session->priv; struct sk_buff *skb; skb = j1939_tp_tx_dat_new(priv, &session->skcb, false, false); if (IS_ERR(skb)) return PTR_ERR(skb); skb_put_data(skb, dat, len); if (j1939_tp_padding && len < 8) memset(skb_put(skb, 8 - len), 0xff, 8 - len); return j1939_send_one(priv, skb); } static int j1939_xtp_do_tx_ctl(struct j1939_priv *priv, const struct j1939_sk_buff_cb *re_skcb, bool swap_src_dst, pgn_t pgn, const u8 *dat) { struct sk_buff *skb; u8 *skdat; if (!j1939_tp_im_involved(re_skcb, swap_src_dst)) return 0; skb = j1939_tp_tx_dat_new(priv, re_skcb, true, swap_src_dst); if (IS_ERR(skb)) return PTR_ERR(skb); skdat = skb_put(skb, 8); memcpy(skdat, dat, 5); skdat[5] = (pgn >> 0); skdat[6] = (pgn >> 8); skdat[7] = (pgn >> 16); return j1939_send_one(priv, skb); } static inline int j1939_tp_tx_ctl(struct j1939_session *session, bool swap_src_dst, const u8 *dat) { struct j1939_priv *priv = session->priv; return j1939_xtp_do_tx_ctl(priv, &session->skcb, swap_src_dst, session->skcb.addr.pgn, dat); } static int j1939_xtp_tx_abort(struct j1939_priv *priv, const struct j1939_sk_buff_cb *re_skcb, bool swap_src_dst, enum j1939_xtp_abort err, pgn_t pgn) { u8 dat[5]; if (!j1939_tp_im_involved(re_skcb, swap_src_dst)) return 0; memset(dat, 0xff, sizeof(dat)); dat[0] = J1939_TP_CMD_ABORT; dat[1] = err; return j1939_xtp_do_tx_ctl(priv, re_skcb, swap_src_dst, pgn, dat); } void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec) { j1939_session_get(session); hrtimer_start(&session->txtimer, ms_to_ktime(msec), HRTIMER_MODE_REL_SOFT); } static inline void j1939_tp_set_rxtimeout(struct j1939_session *session, int msec) { j1939_session_rxtimer_cancel(session); j1939_session_get(session); hrtimer_start(&session->rxtimer, ms_to_ktime(msec), HRTIMER_MODE_REL_SOFT); } static int j1939_session_tx_rts(struct j1939_session *session) { u8 dat[8]; int ret; memset(dat, 0xff, sizeof(dat)); dat[1] = (session->total_message_size >> 0); dat[2] = (session->total_message_size >> 8); dat[3] = session->pkt.total; if (session->skcb.addr.type == J1939_ETP) { dat[0] = J1939_ETP_CMD_RTS; dat[1] = (session->total_message_size >> 0); dat[2] = (session->total_message_size >> 8); dat[3] = (session->total_message_size >> 16); dat[4] = (session->total_message_size >> 24); } else if (j1939_cb_is_broadcast(&session->skcb)) { dat[0] = J1939_TP_CMD_BAM; /* fake cts for broadcast */ session->pkt.tx = 0; } else { dat[0] = J1939_TP_CMD_RTS; dat[4] = dat[3]; } if (dat[0] == session->last_txcmd) /* done already */ return 0; ret = j1939_tp_tx_ctl(session, false, dat); if (ret < 0) return ret; session->last_txcmd = dat[0]; if (dat[0] == J1939_TP_CMD_BAM) { j1939_tp_schedule_txtimer(session, 50); j1939_tp_set_rxtimeout(session, 250); } else { j1939_tp_set_rxtimeout(session, 1250); } netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); return 0; } static int j1939_session_tx_dpo(struct j1939_session *session) { unsigned int pkt; u8 dat[8]; int ret; memset(dat, 0xff, sizeof(dat)); dat[0] = J1939_ETP_CMD_DPO; session->pkt.dpo = session->pkt.tx_acked; pkt = session->pkt.dpo; dat[1] = session->pkt.last - session->pkt.tx_acked; dat[2] = (pkt >> 0); dat[3] = (pkt >> 8); dat[4] = (pkt >> 16); ret = j1939_tp_tx_ctl(session, false, dat); if (ret < 0) return ret; session->last_txcmd = dat[0]; j1939_tp_set_rxtimeout(session, 1250); session->pkt.tx = session->pkt.tx_acked; netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); return 0; } static int j1939_session_tx_dat(struct j1939_session *session) { struct j1939_priv *priv = session->priv; struct j1939_sk_buff_cb *se_skcb; int offset, pkt_done, pkt_end; unsigned int len, pdelay; struct sk_buff *se_skb; const u8 *tpdat; int ret = 0; u8 dat[8]; se_skb = j1939_session_skb_get_by_offset(session, session->pkt.tx * 7); if (!se_skb) return -ENOBUFS; se_skcb = j1939_skb_to_cb(se_skb); tpdat = se_skb->data; ret = 0; pkt_done = 0; if (session->skcb.addr.type != J1939_ETP && j1939_cb_is_broadcast(&session->skcb)) pkt_end = session->pkt.total; else pkt_end = session->pkt.last; while (session->pkt.tx < pkt_end) { dat[0] = session->pkt.tx - session->pkt.dpo + 1; offset = (session->pkt.tx * 7) - se_skcb->offset; len = se_skb->len - offset; if (len > 7) len = 7; if (offset + len > se_skb->len) { netdev_err_once(priv->ndev, "%s: 0x%p: requested data outside of queued buffer: offset %i, len %i, pkt.tx: %i\n", __func__, session, se_skcb->offset, se_skb->len , session->pkt.tx); ret = -EOVERFLOW; goto out_free; } if (!len) { ret = -ENOBUFS; break; } memcpy(&dat[1], &tpdat[offset], len); ret = j1939_tp_tx_dat(session, dat, len + 1); if (ret < 0) { /* ENOBUFS == CAN interface TX queue is full */ if (ret != -ENOBUFS) netdev_alert(priv->ndev, "%s: 0x%p: queue data error: %i\n", __func__, session, ret); break; } session->last_txcmd = 0xff; pkt_done++; session->pkt.tx++; pdelay = j1939_cb_is_broadcast(&session->skcb) ? 50 : j1939_tp_packet_delay; if (session->pkt.tx < session->pkt.total && pdelay) { j1939_tp_schedule_txtimer(session, pdelay); break; } } if (pkt_done) j1939_tp_set_rxtimeout(session, 250); out_free: if (ret) kfree_skb(se_skb); else consume_skb(se_skb); return ret; } static int j1939_xtp_txnext_transmiter(struct j1939_session *session) { struct j1939_priv *priv = session->priv; int ret = 0; if (!j1939_tp_im_transmitter(&session->skcb)) { netdev_alert(priv->ndev, "%s: 0x%p: called by not transmitter!\n", __func__, session); return -EINVAL; } switch (session->last_cmd) { case 0: ret = j1939_session_tx_rts(session); break; case J1939_ETP_CMD_CTS: if (session->last_txcmd != J1939_ETP_CMD_DPO) { ret = j1939_session_tx_dpo(session); if (ret) return ret; } fallthrough; case J1939_TP_CMD_CTS: case 0xff: /* did some data */ case J1939_ETP_CMD_DPO: case J1939_TP_CMD_BAM: ret = j1939_session_tx_dat(session); break; default: netdev_alert(priv->ndev, "%s: 0x%p: unexpected last_cmd: %x\n", __func__, session, session->last_cmd); } return ret; } static int j1939_session_tx_cts(struct j1939_session *session) { struct j1939_priv *priv = session->priv; unsigned int pkt, len; int ret; u8 dat[8]; if (!j1939_sk_recv_match(priv, &session->skcb)) return -ENOENT; len = session->pkt.total - session->pkt.rx; len = min3(len, session->pkt.block, j1939_tp_block ?: 255); memset(dat, 0xff, sizeof(dat)); if (session->skcb.addr.type == J1939_ETP) { pkt = session->pkt.rx + 1; dat[0] = J1939_ETP_CMD_CTS; dat[1] = len; dat[2] = (pkt >> 0); dat[3] = (pkt >> 8); dat[4] = (pkt >> 16); } else { dat[0] = J1939_TP_CMD_CTS; dat[1] = len; dat[2] = session->pkt.rx + 1; } if (dat[0] == session->last_txcmd) /* done already */ return 0; ret = j1939_tp_tx_ctl(session, true, dat); if (ret < 0) return ret; if (len) /* only mark cts done when len is set */ session->last_txcmd = dat[0]; j1939_tp_set_rxtimeout(session, 1250); netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); return 0; } static int j1939_session_tx_eoma(struct j1939_session *session) { struct j1939_priv *priv = session->priv; u8 dat[8]; int ret; if (!j1939_sk_recv_match(priv, &session->skcb)) return -ENOENT; memset(dat, 0xff, sizeof(dat)); if (session->skcb.addr.type == J1939_ETP) { dat[0] = J1939_ETP_CMD_EOMA; dat[1] = session->total_message_size >> 0; dat[2] = session->total_message_size >> 8; dat[3] = session->total_message_size >> 16; dat[4] = session->total_message_size >> 24; } else { dat[0] = J1939_TP_CMD_EOMA; dat[1] = session->total_message_size; dat[2] = session->total_message_size >> 8; dat[3] = session->pkt.total; } if (dat[0] == session->last_txcmd) /* done already */ return 0; ret = j1939_tp_tx_ctl(session, true, dat); if (ret < 0) return ret; session->last_txcmd = dat[0]; /* wait for the EOMA packet to come in */ j1939_tp_set_rxtimeout(session, 1250); netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); return 0; } static int j1939_xtp_txnext_receiver(struct j1939_session *session) { struct j1939_priv *priv = session->priv; int ret = 0; if (!j1939_tp_im_receiver(&session->skcb)) { netdev_alert(priv->ndev, "%s: 0x%p: called by not receiver!\n", __func__, session); return -EINVAL; } switch (session->last_cmd) { case J1939_TP_CMD_RTS: case J1939_ETP_CMD_RTS: ret = j1939_session_tx_cts(session); break; case J1939_ETP_CMD_CTS: case J1939_TP_CMD_CTS: case 0xff: /* did some data */ case J1939_ETP_CMD_DPO: if ((session->skcb.addr.type == J1939_TP && j1939_cb_is_broadcast(&session->skcb))) break; if (session->pkt.rx >= session->pkt.total) { ret = j1939_session_tx_eoma(session); } else if (session->pkt.rx >= session->pkt.last) { session->last_txcmd = 0; ret = j1939_session_tx_cts(session); } break; default: netdev_alert(priv->ndev, "%s: 0x%p: unexpected last_cmd: %x\n", __func__, session, session->last_cmd); } return ret; } static int j1939_simple_txnext(struct j1939_session *session) { struct j1939_priv *priv = session->priv; struct sk_buff *se_skb = j1939_session_skb_get(session); struct sk_buff *skb; int ret; if (!se_skb) return 0; skb = skb_clone(se_skb, GFP_ATOMIC); if (!skb) { ret = -ENOMEM; goto out_free; } can_skb_set_owner(skb, se_skb->sk); j1939_tp_set_rxtimeout(session, J1939_SIMPLE_ECHO_TIMEOUT_MS); ret = j1939_send_one(priv, skb); if (ret) goto out_free; j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_SCHED); j1939_sk_queue_activate_next(session); out_free: if (ret) kfree_skb(se_skb); else consume_skb(se_skb); return ret; } static bool j1939_session_deactivate_locked(struct j1939_session *session) { bool active = false; lockdep_assert_held(&session->priv->active_session_list_lock); if (session->state >= J1939_SESSION_ACTIVE && session->state < J1939_SESSION_ACTIVE_MAX) { active = true; list_del_init(&session->active_session_list_entry); session->state = J1939_SESSION_DONE; j1939_session_put(session); } return active; } static bool j1939_session_deactivate(struct j1939_session *session) { struct j1939_priv *priv = session->priv; bool active; j1939_session_list_lock(priv); active = j1939_session_deactivate_locked(session); j1939_session_list_unlock(priv); return active; } static void j1939_session_deactivate_activate_next(struct j1939_session *session) { if (j1939_session_deactivate(session)) j1939_sk_queue_activate_next(session); } static void __j1939_session_cancel(struct j1939_session *session, enum j1939_xtp_abort err) { struct j1939_priv *priv = session->priv; WARN_ON_ONCE(!err); lockdep_assert_held(&session->priv->active_session_list_lock); session->err = j1939_xtp_abort_to_errno(priv, err); session->state = J1939_SESSION_WAITING_ABORT; /* do not send aborts on incoming broadcasts */ if (!j1939_cb_is_broadcast(&session->skcb)) { j1939_xtp_tx_abort(priv, &session->skcb, !session->transmission, err, session->skcb.addr.pgn); } if (session->sk) j1939_sk_send_loop_abort(session->sk, session->err); } static void j1939_session_cancel(struct j1939_session *session, enum j1939_xtp_abort err) { j1939_session_list_lock(session->priv); if (session->state >= J1939_SESSION_ACTIVE && session->state < J1939_SESSION_WAITING_ABORT) { j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS); __j1939_session_cancel(session, err); } j1939_session_list_unlock(session->priv); if (!session->sk) j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); } static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer) { struct j1939_session *session = container_of(hrtimer, struct j1939_session, txtimer); struct j1939_priv *priv = session->priv; int ret = 0; if (session->skcb.addr.type == J1939_SIMPLE) { ret = j1939_simple_txnext(session); } else { if (session->transmission) ret = j1939_xtp_txnext_transmiter(session); else ret = j1939_xtp_txnext_receiver(session); } switch (ret) { case -ENOBUFS: /* Retry limit is currently arbitrary chosen */ if (session->tx_retry < J1939_XTP_TX_RETRY_LIMIT) { session->tx_retry++; j1939_tp_schedule_txtimer(session, 10 + get_random_u32_below(16)); } else { netdev_alert(priv->ndev, "%s: 0x%p: tx retry count reached\n", __func__, session); session->err = -ENETUNREACH; j1939_session_rxtimer_cancel(session); j1939_session_deactivate_activate_next(session); } break; case -ENETDOWN: /* In this case we should get a netdev_event(), all active * sessions will be cleared by j1939_cancel_active_session(). * So handle this as an error, but let * j1939_cancel_active_session() do the cleanup including * propagation of the error to user space. */ break; case -EOVERFLOW: j1939_session_cancel(session, J1939_XTP_ABORT_ECTS_TOO_BIG); break; case 0: session->tx_retry = 0; break; default: netdev_alert(priv->ndev, "%s: 0x%p: tx aborted with unknown reason: %i\n", __func__, session, ret); if (session->skcb.addr.type != J1939_SIMPLE) { j1939_session_cancel(session, J1939_XTP_ABORT_OTHER); } else { session->err = ret; j1939_session_rxtimer_cancel(session); j1939_session_deactivate_activate_next(session); } } j1939_session_put(session); return HRTIMER_NORESTART; } static void j1939_session_completed(struct j1939_session *session) { struct sk_buff *se_skb; if (!session->transmission) { se_skb = j1939_session_skb_get(session); /* distribute among j1939 receivers */ j1939_sk_recv(session->priv, se_skb); consume_skb(se_skb); } j1939_session_deactivate_activate_next(session); } static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer) { struct j1939_session *session = container_of(hrtimer, struct j1939_session, rxtimer); struct j1939_priv *priv = session->priv; if (session->state == J1939_SESSION_WAITING_ABORT) { netdev_alert(priv->ndev, "%s: 0x%p: abort rx timeout. Force session deactivation\n", __func__, session); j1939_session_deactivate_activate_next(session); } else if (session->skcb.addr.type == J1939_SIMPLE) { netdev_alert(priv->ndev, "%s: 0x%p: Timeout. Failed to send simple message.\n", __func__, session); /* The message is probably stuck in the CAN controller and can * be send as soon as CAN bus is in working state again. */ session->err = -ETIME; j1939_session_deactivate(session); } else { j1939_session_list_lock(session->priv); if (session->state >= J1939_SESSION_ACTIVE && session->state < J1939_SESSION_ACTIVE_MAX) { netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n", __func__, session); j1939_session_get(session); hrtimer_start(&session->rxtimer, ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS), HRTIMER_MODE_REL_SOFT); __j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT); } j1939_session_list_unlock(session->priv); if (!session->sk) j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); } j1939_session_put(session); return HRTIMER_NORESTART; } static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session, const struct sk_buff *skb) { const struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); pgn_t pgn = j1939_xtp_ctl_to_pgn(skb->data); struct j1939_priv *priv = session->priv; enum j1939_xtp_abort abort = J1939_XTP_NO_ABORT; u8 cmd = skb->data[0]; if (session->skcb.addr.pgn == pgn) return false; switch (cmd) { case J1939_TP_CMD_BAM: abort = J1939_XTP_NO_ABORT; break; case J1939_ETP_CMD_RTS: fallthrough; case J1939_TP_CMD_RTS: abort = J1939_XTP_ABORT_BUSY; break; case J1939_ETP_CMD_CTS: fallthrough; case J1939_TP_CMD_CTS: abort = J1939_XTP_ABORT_ECTS_UNXPECTED_PGN; break; case J1939_ETP_CMD_DPO: abort = J1939_XTP_ABORT_BAD_EDPO_PGN; break; case J1939_ETP_CMD_EOMA: fallthrough; case J1939_TP_CMD_EOMA: abort = J1939_XTP_ABORT_OTHER; break; case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */ abort = J1939_XTP_NO_ABORT; break; default: WARN_ON_ONCE(1); break; } netdev_warn(priv->ndev, "%s: 0x%p: CMD 0x%02x with PGN 0x%05x for running session with different PGN 0x%05x.\n", __func__, session, cmd, pgn, session->skcb.addr.pgn); if (abort != J1939_XTP_NO_ABORT) j1939_xtp_tx_abort(priv, skcb, true, abort, pgn); return true; } static void j1939_xtp_rx_abort_one(struct j1939_priv *priv, struct sk_buff *skb, bool reverse, bool transmitter) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_session *session; u8 abort = skb->data[1]; session = j1939_session_get_by_addr(priv, &skcb->addr, reverse, transmitter); if (!session) return; if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) goto abort_put; netdev_info(priv->ndev, "%s: 0x%p: 0x%05x: (%u) %s\n", __func__, session, j1939_xtp_ctl_to_pgn(skb->data), abort, j1939_xtp_abort_to_str(abort)); j1939_session_timers_cancel(session); session->err = j1939_xtp_abort_to_errno(priv, abort); if (session->sk) j1939_sk_send_loop_abort(session->sk, session->err); else j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); j1939_session_deactivate_activate_next(session); abort_put: j1939_session_put(session); } /* abort packets may come in 2 directions */ static void j1939_xtp_rx_abort(struct j1939_priv *priv, struct sk_buff *skb, bool transmitter) { j1939_xtp_rx_abort_one(priv, skb, false, transmitter); j1939_xtp_rx_abort_one(priv, skb, true, transmitter); } static void j1939_xtp_rx_eoma_one(struct j1939_session *session, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); const u8 *dat; int len; if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) return; dat = skb->data; if (skcb->addr.type == J1939_ETP) len = j1939_etp_ctl_to_size(dat); else len = j1939_tp_ctl_to_size(dat); if (session->total_message_size != len) { netdev_warn_once(session->priv->ndev, "%s: 0x%p: Incorrect size. Expected: %i; got: %i.\n", __func__, session, session->total_message_size, len); } netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); session->pkt.tx_acked = session->pkt.total; j1939_session_timers_cancel(session); /* transmitted without problems */ j1939_session_completed(session); } static void j1939_xtp_rx_eoma(struct j1939_priv *priv, struct sk_buff *skb, bool transmitter) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_session *session; session = j1939_session_get_by_addr(priv, &skcb->addr, true, transmitter); if (!session) return; j1939_xtp_rx_eoma_one(session, skb); j1939_session_put(session); } static void j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb) { enum j1939_xtp_abort err = J1939_XTP_ABORT_FAULT; unsigned int pkt; const u8 *dat; dat = skb->data; if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) return; netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); if (session->last_cmd == dat[0]) { err = J1939_XTP_ABORT_DUP_SEQ; goto out_session_cancel; } if (session->skcb.addr.type == J1939_ETP) pkt = j1939_etp_ctl_to_packet(dat); else pkt = dat[2]; if (!pkt) goto out_session_cancel; else if (dat[1] > session->pkt.block /* 0xff for etp */) goto out_session_cancel; /* set packet counters only when not CTS(0) */ session->pkt.tx_acked = pkt - 1; j1939_session_skb_drop_old(session); session->pkt.last = session->pkt.tx_acked + dat[1]; if (session->pkt.last > session->pkt.total) /* safety measure */ session->pkt.last = session->pkt.total; /* TODO: do not set tx here, do it in txtimer */ session->pkt.tx = session->pkt.tx_acked; session->last_cmd = dat[0]; if (dat[1]) { j1939_tp_set_rxtimeout(session, 1250); if (session->transmission) { if (session->pkt.tx_acked) j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_SCHED); j1939_session_txtimer_cancel(session); j1939_tp_schedule_txtimer(session, 0); } } else { /* CTS(0) */ j1939_tp_set_rxtimeout(session, 550); } return; out_session_cancel: j1939_session_timers_cancel(session); j1939_session_cancel(session, err); } static void j1939_xtp_rx_cts(struct j1939_priv *priv, struct sk_buff *skb, bool transmitter) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_session *session; session = j1939_session_get_by_addr(priv, &skcb->addr, true, transmitter); if (!session) return; j1939_xtp_rx_cts_one(session, skb); j1939_session_put(session); } static struct j1939_session *j1939_session_new(struct j1939_priv *priv, struct sk_buff *skb, size_t size) { struct j1939_session *session; struct j1939_sk_buff_cb *skcb; session = kzalloc(sizeof(*session), gfp_any()); if (!session) return NULL; INIT_LIST_HEAD(&session->active_session_list_entry); INIT_LIST_HEAD(&session->sk_session_queue_entry); kref_init(&session->kref); j1939_priv_get(priv); session->priv = priv; session->total_message_size = size; session->state = J1939_SESSION_NEW; skb_queue_head_init(&session->skb_queue); skb_queue_tail(&session->skb_queue, skb_get(skb)); skcb = j1939_skb_to_cb(skb); memcpy(&session->skcb, skcb, sizeof(session->skcb)); hrtimer_setup(&session->txtimer, j1939_tp_txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); hrtimer_setup(&session->rxtimer, j1939_tp_rxtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); netdev_dbg(priv->ndev, "%s: 0x%p: sa: %02x, da: %02x\n", __func__, session, skcb->addr.sa, skcb->addr.da); return session; } static struct j1939_session *j1939_session_fresh_new(struct j1939_priv *priv, int size, const struct j1939_sk_buff_cb *rel_skcb) { struct sk_buff *skb; struct j1939_sk_buff_cb *skcb; struct j1939_session *session; skb = alloc_skb(size + sizeof(struct can_skb_priv), GFP_ATOMIC); if (unlikely(!skb)) return NULL; skb->dev = priv->ndev; can_skb_reserve(skb); can_skb_prv(skb)->ifindex = priv->ndev->ifindex; can_skb_prv(skb)->skbcnt = 0; skcb = j1939_skb_to_cb(skb); memcpy(skcb, rel_skcb, sizeof(*skcb)); session = j1939_session_new(priv, skb, size); if (!session) { kfree_skb(skb); return NULL; } /* alloc data area */ skb_put(skb, size); /* skb is recounted in j1939_session_new() */ return session; } int j1939_session_activate(struct j1939_session *session) { struct j1939_priv *priv = session->priv; struct j1939_session *active = NULL; int ret = 0; j1939_session_list_lock(priv); if (session->skcb.addr.type != J1939_SIMPLE) active = j1939_session_get_by_addr_locked(priv, &priv->active_session_list, &session->skcb.addr, false, session->transmission); if (active) { j1939_session_put(active); ret = -EAGAIN; } else { WARN_ON_ONCE(session->state != J1939_SESSION_NEW); list_add_tail(&session->active_session_list_entry, &priv->active_session_list); j1939_session_get(session); session->state = J1939_SESSION_ACTIVE; netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); } j1939_session_list_unlock(priv); return ret; } static struct j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv, struct sk_buff *skb) { enum j1939_xtp_abort abort = J1939_XTP_NO_ABORT; struct j1939_sk_buff_cb skcb = *j1939_skb_to_cb(skb); struct j1939_session *session; const u8 *dat; int len, ret; pgn_t pgn; netdev_dbg(priv->ndev, "%s\n", __func__); dat = skb->data; pgn = j1939_xtp_ctl_to_pgn(dat); skcb.addr.pgn = pgn; if (!j1939_sk_recv_match(priv, &skcb)) return NULL; if (skcb.addr.type == J1939_ETP) { len = j1939_etp_ctl_to_size(dat); if (len > J1939_MAX_ETP_PACKET_SIZE) abort = J1939_XTP_ABORT_FAULT; else if (len > priv->tp_max_packet_size) abort = J1939_XTP_ABORT_RESOURCE; else if (len <= J1939_MAX_TP_PACKET_SIZE) abort = J1939_XTP_ABORT_FAULT; } else { len = j1939_tp_ctl_to_size(dat); if (len > J1939_MAX_TP_PACKET_SIZE) abort = J1939_XTP_ABORT_FAULT; else if (len > priv->tp_max_packet_size) abort = J1939_XTP_ABORT_RESOURCE; else if (len < J1939_MIN_TP_PACKET_SIZE) abort = J1939_XTP_ABORT_FAULT; } if (abort != J1939_XTP_NO_ABORT) { j1939_xtp_tx_abort(priv, &skcb, true, abort, pgn); return NULL; } session = j1939_session_fresh_new(priv, len, &skcb); if (!session) { j1939_xtp_tx_abort(priv, &skcb, true, J1939_XTP_ABORT_RESOURCE, pgn); return NULL; } /* initialize the control buffer: plain copy */ session->pkt.total = (len + 6) / 7; session->pkt.block = 0xff; if (skcb.addr.type != J1939_ETP) { if (dat[3] != session->pkt.total) netdev_alert(priv->ndev, "%s: 0x%p: strange total, %u != %u\n", __func__, session, session->pkt.total, dat[3]); session->pkt.total = dat[3]; session->pkt.block = min(dat[3], dat[4]); } session->pkt.rx = 0; session->pkt.tx = 0; session->tskey = priv->rx_tskey++; j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_RTS); ret = j1939_session_activate(session); if (ret) { /* Entering this scope indicates an issue with the J1939 bus. * Possible scenarios include: * - A time lapse occurred, and a new session was initiated * due to another packet being sent correctly. This could * have been caused by too long interrupt, debugger, or being * out-scheduled by another task. * - The bus is receiving numerous erroneous packets, either * from a malfunctioning device or during a test scenario. */ netdev_alert(priv->ndev, "%s: 0x%p: concurrent session with same addr (%02x %02x) is already active.\n", __func__, session, skcb.addr.sa, skcb.addr.da); j1939_session_put(session); return NULL; } return session; } static int j1939_xtp_rx_rts_session_active(struct j1939_session *session, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_priv *priv = session->priv; if (!session->transmission) { if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) return -EBUSY; /* RTS on active session */ j1939_session_timers_cancel(session); j1939_session_cancel(session, J1939_XTP_ABORT_BUSY); } if (session->last_cmd != 0) { /* we received a second rts on the same connection */ netdev_alert(priv->ndev, "%s: 0x%p: connection exists (%02x %02x). last cmd: %x\n", __func__, session, skcb->addr.sa, skcb->addr.da, session->last_cmd); j1939_session_timers_cancel(session); j1939_session_cancel(session, J1939_XTP_ABORT_BUSY); if (session->transmission) j1939_session_deactivate_activate_next(session); return -EBUSY; } if (session->skcb.addr.sa != skcb->addr.sa || session->skcb.addr.da != skcb->addr.da) netdev_warn(priv->ndev, "%s: 0x%p: session->skcb.addr.sa=0x%02x skcb->addr.sa=0x%02x session->skcb.addr.da=0x%02x skcb->addr.da=0x%02x\n", __func__, session, session->skcb.addr.sa, skcb->addr.sa, session->skcb.addr.da, skcb->addr.da); /* make sure 'sa' & 'da' are correct ! * They may be 'not filled in yet' for sending * skb's, since they did not pass the Address Claim ever. */ session->skcb.addr.sa = skcb->addr.sa; session->skcb.addr.da = skcb->addr.da; netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); return 0; } static void j1939_xtp_rx_rts(struct j1939_priv *priv, struct sk_buff *skb, bool transmitter) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_session *session; u8 cmd = skb->data[0]; session = j1939_session_get_by_addr(priv, &skcb->addr, false, transmitter); if (!session) { if (transmitter) { /* If we're the transmitter and this function is called, * we received our own RTS. A session has already been * created. * * For some reasons however it might have been destroyed * already. So don't create a new one here (using * "j1939_xtp_rx_rts_session_new()") as this will be a * receiver session. * * The reasons the session is already destroyed might * be: * - user space closed socket was and the session was * aborted * - session was aborted due to external abort message */ return; } session = j1939_xtp_rx_rts_session_new(priv, skb); if (!session) { if (cmd == J1939_TP_CMD_BAM && j1939_sk_recv_match(priv, skcb)) netdev_info(priv->ndev, "%s: failed to create TP BAM session\n", __func__); return; } } else { if (j1939_xtp_rx_rts_session_active(session, skb)) { j1939_session_put(session); return; } } session->last_cmd = cmd; if (cmd == J1939_TP_CMD_BAM) { if (!session->transmission) j1939_tp_set_rxtimeout(session, 750); } else { if (!session->transmission) { j1939_session_txtimer_cancel(session); j1939_tp_schedule_txtimer(session, 0); } j1939_tp_set_rxtimeout(session, 1250); } j1939_session_put(session); } static void j1939_xtp_rx_dpo_one(struct j1939_session *session, struct sk_buff *skb) { const u8 *dat = skb->data; if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) return; netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); /* transmitted without problems */ session->pkt.dpo = j1939_etp_ctl_to_packet(skb->data); session->last_cmd = dat[0]; j1939_tp_set_rxtimeout(session, 750); if (!session->transmission) j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_DPO); } static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb, bool transmitter) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_session *session; session = j1939_session_get_by_addr(priv, &skcb->addr, false, transmitter); if (!session) { netdev_info(priv->ndev, "%s: no connection found\n", __func__); return; } j1939_xtp_rx_dpo_one(session, skb); j1939_session_put(session); } static void j1939_xtp_rx_dat_one(struct j1939_session *session, struct sk_buff *skb) { enum j1939_xtp_abort abort = J1939_XTP_ABORT_FAULT; struct j1939_priv *priv = session->priv; struct j1939_sk_buff_cb *skcb, *se_skcb; struct sk_buff *se_skb = NULL; const u8 *dat; u8 *tpdat; int offset; int nbytes; bool final = false; bool remain = false; bool do_cts_eoma = false; int packet; skcb = j1939_skb_to_cb(skb); dat = skb->data; if (skb->len != 8) { /* makes no sense */ abort = J1939_XTP_ABORT_UNEXPECTED_DATA; goto out_session_cancel; } switch (session->last_cmd) { case 0xff: break; case J1939_ETP_CMD_DPO: if (skcb->addr.type == J1939_ETP) break; fallthrough; case J1939_TP_CMD_BAM: fallthrough; case J1939_TP_CMD_CTS: if (skcb->addr.type != J1939_ETP) break; fallthrough; default: netdev_info(priv->ndev, "%s: 0x%p: last %02x\n", __func__, session, session->last_cmd); goto out_session_cancel; } packet = (dat[0] - 1 + session->pkt.dpo); if (packet > session->pkt.total || (session->pkt.rx + 1) > session->pkt.total) { netdev_info(priv->ndev, "%s: 0x%p: should have been completed\n", __func__, session); goto out_session_cancel; } se_skb = j1939_session_skb_get_by_offset(session, packet * 7); if (!se_skb) { netdev_warn(priv->ndev, "%s: 0x%p: no skb found\n", __func__, session); goto out_session_cancel; } se_skcb = j1939_skb_to_cb(se_skb); offset = packet * 7 - se_skcb->offset; nbytes = se_skb->len - offset; if (nbytes > 7) nbytes = 7; if (nbytes <= 0 || (nbytes + 1) > skb->len) { netdev_info(priv->ndev, "%s: 0x%p: nbytes %i, len %i\n", __func__, session, nbytes, skb->len); goto out_session_cancel; } tpdat = se_skb->data; if (!session->transmission) { memcpy(&tpdat[offset], &dat[1], nbytes); } else { int err; err = memcmp(&tpdat[offset], &dat[1], nbytes); if (err) netdev_err_once(priv->ndev, "%s: 0x%p: Data of RX-looped back packet (%*ph) doesn't match TX data (%*ph)!\n", __func__, session, nbytes, &dat[1], nbytes, &tpdat[offset]); } if (packet == session->pkt.rx) session->pkt.rx++; if (se_skcb->addr.type != J1939_ETP && j1939_cb_is_broadcast(&session->skcb)) { if (session->pkt.rx >= session->pkt.total) final = true; else remain = true; } else { /* never final, an EOMA must follow */ if (session->pkt.rx >= session->pkt.last) do_cts_eoma = true; } if (final) { j1939_session_timers_cancel(session); j1939_session_completed(session); } else if (remain) { if (!session->transmission) j1939_tp_set_rxtimeout(session, 750); } else if (do_cts_eoma) { j1939_tp_set_rxtimeout(session, 1250); if (!session->transmission) j1939_tp_schedule_txtimer(session, 0); } else { j1939_tp_set_rxtimeout(session, 750); } session->last_cmd = 0xff; consume_skb(se_skb); j1939_session_put(session); return; out_session_cancel: kfree_skb(se_skb); j1939_session_timers_cancel(session); j1939_session_cancel(session, abort); j1939_session_put(session); } static void j1939_xtp_rx_dat(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb; struct j1939_session *session; skcb = j1939_skb_to_cb(skb); if (j1939_tp_im_transmitter(skcb)) { session = j1939_session_get_by_addr(priv, &skcb->addr, false, true); if (!session) netdev_info(priv->ndev, "%s: no tx connection found\n", __func__); else j1939_xtp_rx_dat_one(session, skb); } if (j1939_tp_im_receiver(skcb)) { session = j1939_session_get_by_addr(priv, &skcb->addr, false, false); if (!session) netdev_info(priv->ndev, "%s: no rx connection found\n", __func__); else j1939_xtp_rx_dat_one(session, skb); } if (j1939_cb_is_broadcast(skcb)) { session = j1939_session_get_by_addr(priv, &skcb->addr, false, false); if (session) j1939_xtp_rx_dat_one(session, skb); } } /* j1939 main intf */ struct j1939_session *j1939_tp_send(struct j1939_priv *priv, struct sk_buff *skb, size_t size) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_session *session; int ret; if (skcb->addr.pgn == J1939_TP_PGN_DAT || skcb->addr.pgn == J1939_TP_PGN_CTL || skcb->addr.pgn == J1939_ETP_PGN_DAT || skcb->addr.pgn == J1939_ETP_PGN_CTL) /* avoid conflict */ return ERR_PTR(-EDOM); if (size > priv->tp_max_packet_size) return ERR_PTR(-EMSGSIZE); if (size <= 8) skcb->addr.type = J1939_SIMPLE; else if (size > J1939_MAX_TP_PACKET_SIZE) skcb->addr.type = J1939_ETP; else skcb->addr.type = J1939_TP; if (skcb->addr.type == J1939_ETP && j1939_cb_is_broadcast(skcb)) return ERR_PTR(-EDESTADDRREQ); /* fill in addresses from names */ ret = j1939_ac_fixup(priv, skb); if (unlikely(ret)) return ERR_PTR(ret); /* fix DST flags, it may be used there soon */ if (j1939_address_is_unicast(skcb->addr.da) && priv->ents[skcb->addr.da].nusers) skcb->flags |= J1939_ECU_LOCAL_DST; /* src is always local, I'm sending ... */ skcb->flags |= J1939_ECU_LOCAL_SRC; /* prepare new session */ session = j1939_session_new(priv, skb, size); if (!session) return ERR_PTR(-ENOMEM); /* skb is recounted in j1939_session_new() */ sock_hold(skb->sk); session->sk = skb->sk; session->transmission = true; session->pkt.total = (size + 6) / 7; session->pkt.block = skcb->addr.type == J1939_ETP ? 255 : min(j1939_tp_block ?: 255, session->pkt.total); if (j1939_cb_is_broadcast(&session->skcb)) /* set the end-packet for broadcast */ session->pkt.last = session->pkt.total; skcb->tskey = atomic_inc_return(&session->sk->sk_tskey) - 1; session->tskey = skcb->tskey; return session; } static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); int extd = J1939_TP; u8 cmd = skb->data[0]; switch (cmd) { case J1939_ETP_CMD_RTS: extd = J1939_ETP; fallthrough; case J1939_TP_CMD_BAM: if (cmd == J1939_TP_CMD_BAM && !j1939_cb_is_broadcast(skcb)) { netdev_err_once(priv->ndev, "%s: BAM to unicast (%02x), ignoring!\n", __func__, skcb->addr.sa); return; } fallthrough; case J1939_TP_CMD_RTS: if (skcb->addr.type != extd) return; if (cmd == J1939_TP_CMD_RTS && j1939_cb_is_broadcast(skcb)) { netdev_alert(priv->ndev, "%s: rts without destination (%02x)\n", __func__, skcb->addr.sa); return; } if (j1939_tp_im_transmitter(skcb)) j1939_xtp_rx_rts(priv, skb, true); if (j1939_tp_im_receiver(skcb) || j1939_cb_is_broadcast(skcb)) j1939_xtp_rx_rts(priv, skb, false); break; case J1939_ETP_CMD_CTS: extd = J1939_ETP; fallthrough; case J1939_TP_CMD_CTS: if (skcb->addr.type != extd) return; if (j1939_tp_im_transmitter(skcb)) j1939_xtp_rx_cts(priv, skb, false); if (j1939_tp_im_receiver(skcb)) j1939_xtp_rx_cts(priv, skb, true); break; case J1939_ETP_CMD_DPO: if (skcb->addr.type != J1939_ETP) return; if (j1939_tp_im_transmitter(skcb)) j1939_xtp_rx_dpo(priv, skb, true); if (j1939_tp_im_receiver(skcb)) j1939_xtp_rx_dpo(priv, skb, false); break; case J1939_ETP_CMD_EOMA: extd = J1939_ETP; fallthrough; case J1939_TP_CMD_EOMA: if (skcb->addr.type != extd) return; if (j1939_tp_im_transmitter(skcb)) j1939_xtp_rx_eoma(priv, skb, false); if (j1939_tp_im_receiver(skcb)) j1939_xtp_rx_eoma(priv, skb, true); break; case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */ if (j1939_cb_is_broadcast(skcb)) { netdev_err_once(priv->ndev, "%s: abort to broadcast (%02x), ignoring!\n", __func__, skcb->addr.sa); return; } if (j1939_tp_im_transmitter(skcb)) j1939_xtp_rx_abort(priv, skb, true); if (j1939_tp_im_receiver(skcb)) j1939_xtp_rx_abort(priv, skb, false); break; default: return; } } int j1939_tp_recv(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); if (!j1939_tp_im_involved_anydir(skcb) && !j1939_cb_is_broadcast(skcb)) return 0; switch (skcb->addr.pgn) { case J1939_ETP_PGN_DAT: skcb->addr.type = J1939_ETP; fallthrough; case J1939_TP_PGN_DAT: j1939_xtp_rx_dat(priv, skb); break; case J1939_ETP_PGN_CTL: skcb->addr.type = J1939_ETP; fallthrough; case J1939_TP_PGN_CTL: if (skb->len < 8) return 0; /* Don't care. Nothing to extract here */ j1939_tp_cmd_recv(priv, skb); break; default: return 0; /* no problem */ } return 1; /* "I processed the message" */ } void j1939_simple_recv(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_session *session; if (!skb->sk) return; if (skb->sk->sk_family != AF_CAN || skb->sk->sk_protocol != CAN_J1939) return; j1939_session_list_lock(priv); session = j1939_session_get_simple(priv, skb); j1939_session_list_unlock(priv); if (!session) { netdev_warn(priv->ndev, "%s: Received already invalidated message\n", __func__); return; } j1939_session_timers_cancel(session); j1939_session_deactivate(session); j1939_session_put(session); } int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk) { struct j1939_session *session, *saved; netdev_dbg(priv->ndev, "%s, sk: %p\n", __func__, sk); j1939_session_list_lock(priv); list_for_each_entry_safe(session, saved, &priv->active_session_list, active_session_list_entry) { if (!sk || sk == session->sk) { if (hrtimer_try_to_cancel(&session->txtimer) == 1) j1939_session_put(session); if (hrtimer_try_to_cancel(&session->rxtimer) == 1) j1939_session_put(session); session->err = ESHUTDOWN; j1939_session_deactivate_locked(session); } } j1939_session_list_unlock(priv); return NOTIFY_DONE; } void j1939_tp_init(struct j1939_priv *priv) { spin_lock_init(&priv->active_session_list_lock); INIT_LIST_HEAD(&priv->active_session_list); priv->tp_max_packet_size = J1939_MAX_ETP_PACKET_SIZE; } |
| 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 | // SPDX-License-Identifier: GPL-2.0+ /* * Originally from efivars.c * * Copyright (C) 2001,2003,2004 Dell <Matt_Domsch@dell.com> * Copyright (C) 2004 Intel Corporation <matthew.e.tolentino@intel.com> */ #define pr_fmt(fmt) "efivars: " fmt #include <linux/types.h> #include <linux/sizes.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/module.h> #include <linux/string.h> #include <linux/smp.h> #include <linux/efi.h> #include <linux/ucs2_string.h> /* Private pointer to registered efivars */ static struct efivars *__efivars; static DEFINE_SEMAPHORE(efivars_lock, 1); static efi_status_t check_var_size(bool nonblocking, u32 attributes, unsigned long size) { const struct efivar_operations *fops; efi_status_t status; fops = __efivars->ops; if (!fops->query_variable_store) status = EFI_UNSUPPORTED; else status = fops->query_variable_store(attributes, size, nonblocking); if (status == EFI_UNSUPPORTED) return (size <= SZ_64K) ? EFI_SUCCESS : EFI_OUT_OF_RESOURCES; return status; } /** * efivar_is_available - check if efivars is available * * @return true iff evivars is currently registered */ bool efivar_is_available(void) { return __efivars != NULL; } EXPORT_SYMBOL_GPL(efivar_is_available); /** * efivars_register - register an efivars * @efivars: efivars to register * @ops: efivars operations * * Only a single efivars can be registered at any time. */ int efivars_register(struct efivars *efivars, const struct efivar_operations *ops) { int rv; int event; if (down_interruptible(&efivars_lock)) return -EINTR; if (__efivars) { pr_warn("efivars already registered\n"); rv = -EBUSY; goto out; } efivars->ops = ops; __efivars = efivars; if (efivar_supports_writes()) event = EFIVAR_OPS_RDWR; else event = EFIVAR_OPS_RDONLY; blocking_notifier_call_chain(&efivar_ops_nh, event, NULL); pr_info("Registered efivars operations\n"); rv = 0; out: up(&efivars_lock); return rv; } EXPORT_SYMBOL_GPL(efivars_register); /** * efivars_unregister - unregister an efivars * @efivars: efivars to unregister * * The caller must have already removed every entry from the list, * failure to do so is an error. */ int efivars_unregister(struct efivars *efivars) { int rv; if (down_interruptible(&efivars_lock)) return -EINTR; if (!__efivars) { pr_err("efivars not registered\n"); rv = -EINVAL; goto out; } if (__efivars != efivars) { rv = -EINVAL; goto out; } pr_info("Unregistered efivars operations\n"); __efivars = NULL; rv = 0; out: up(&efivars_lock); return rv; } EXPORT_SYMBOL_GPL(efivars_unregister); bool efivar_supports_writes(void) { return __efivars && __efivars->ops->set_variable; } EXPORT_SYMBOL_GPL(efivar_supports_writes); /* * efivar_lock() - obtain the efivar lock, wait for it if needed * @return 0 on success, error code on failure */ int efivar_lock(void) { if (down_interruptible(&efivars_lock)) return -EINTR; if (!__efivars->ops) { up(&efivars_lock); return -ENODEV; } return 0; } EXPORT_SYMBOL_NS_GPL(efivar_lock, "EFIVAR"); /* * efivar_lock() - obtain the efivar lock if it is free * @return 0 on success, error code on failure */ int efivar_trylock(void) { if (down_trylock(&efivars_lock)) return -EBUSY; if (!__efivars->ops) { up(&efivars_lock); return -ENODEV; } return 0; } EXPORT_SYMBOL_NS_GPL(efivar_trylock, "EFIVAR"); /* * efivar_unlock() - release the efivar lock */ void efivar_unlock(void) { up(&efivars_lock); } EXPORT_SYMBOL_NS_GPL(efivar_unlock, "EFIVAR"); /* * efivar_get_variable() - retrieve a variable identified by name/vendor * * Must be called with efivars_lock held. */ efi_status_t efivar_get_variable(efi_char16_t *name, efi_guid_t *vendor, u32 *attr, unsigned long *size, void *data) { return __efivars->ops->get_variable(name, vendor, attr, size, data); } EXPORT_SYMBOL_NS_GPL(efivar_get_variable, "EFIVAR"); /* * efivar_get_next_variable() - enumerate the next name/vendor pair * * Must be called with efivars_lock held. */ efi_status_t efivar_get_next_variable(unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) { return __efivars->ops->get_next_variable(name_size, name, vendor); } EXPORT_SYMBOL_NS_GPL(efivar_get_next_variable, "EFIVAR"); /* * efivar_set_variable_locked() - set a variable identified by name/vendor * * Must be called with efivars_lock held. If @nonblocking is set, it will use * non-blocking primitives so it is guaranteed not to sleep. */ efi_status_t efivar_set_variable_locked(efi_char16_t *name, efi_guid_t *vendor, u32 attr, unsigned long data_size, void *data, bool nonblocking) { efi_set_variable_t *setvar; efi_status_t status; if (data_size > 0) { status = check_var_size(nonblocking, attr, data_size + ucs2_strsize(name, EFI_VAR_NAME_LEN)); if (status != EFI_SUCCESS) return status; } /* * If no _nonblocking variant exists, the ordinary one * is assumed to be non-blocking. */ setvar = __efivars->ops->set_variable_nonblocking; if (!setvar || !nonblocking) setvar = __efivars->ops->set_variable; return setvar(name, vendor, attr, data_size, data); } EXPORT_SYMBOL_NS_GPL(efivar_set_variable_locked, "EFIVAR"); /* * efivar_set_variable() - set a variable identified by name/vendor * * Can be called without holding the efivars_lock. Will sleep on obtaining the * lock, or on obtaining other locks that are needed in order to complete the * call. */ efi_status_t efivar_set_variable(efi_char16_t *name, efi_guid_t *vendor, u32 attr, unsigned long data_size, void *data) { efi_status_t status; if (efivar_lock()) return EFI_ABORTED; status = efivar_set_variable_locked(name, vendor, attr, data_size, data, false); efivar_unlock(); return status; } EXPORT_SYMBOL_NS_GPL(efivar_set_variable, "EFIVAR"); efi_status_t efivar_query_variable_info(u32 attr, u64 *storage_space, u64 *remaining_space, u64 *max_variable_size) { if (!__efivars->ops->query_variable_info) return EFI_UNSUPPORTED; return __efivars->ops->query_variable_info(attr, storage_space, remaining_space, max_variable_size); } EXPORT_SYMBOL_NS_GPL(efivar_query_variable_info, "EFIVAR"); |
| 47 45 47 47 47 47 46 46 45 47 47 141 141 142 142 142 21 21 21 21 21 21 20 16 16 16 127 126 128 126 127 126 124 81 76 45 2 2 2 2 123 10 125 125 124 124 12 124 74 50 124 10 1 121 10 125 120 124 58 2 65 61 25 63 62 64 70 70 45 42 42 63 42 2 1 40 40 40 40 38 30 2 39 39 39 35 31 39 9 38 3 38 38 38 16 38 29 39 39 39 41 68 68 6 62 61 68 46 46 34 34 99 95 94 2 99 2 96 100 13 99 99 99 77 76 77 78 71 46 27 46 71 33 33 33 41 23 19 19 2 52 1 1 1 9 3 1 2 1 6 5 6 6 9 333 333 120 331 213 217 17 15 18 11 16 38 37 2 2 1 37 33 4 2 3 3 37 37 97 12 11 21 5 5 3 21 21 95 36 36 45 46 104 3 3 3 3 1 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 3 9 3 2 1 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 | // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * PF_INET protocol family socket handler. * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Florian La Roche, <flla@stud.uni-sb.de> * Alan Cox, <A.Cox@swansea.ac.uk> * * Changes (see also sock.c) * * piggy, * Karl Knutson : Socket protocol table * A.N.Kuznetsov : Socket death error in accept(). * John Richardson : Fix non blocking error in connect() * so sockets that fail to connect * don't return -EINPROGRESS. * Alan Cox : Asynchronous I/O support * Alan Cox : Keep correct socket pointer on sock * structures * when accept() ed * Alan Cox : Semantics of SO_LINGER aren't state * moved to close when you look carefully. * With this fixed and the accept bug fixed * some RPC stuff seems happier. * Niibe Yutaka : 4.4BSD style write async I/O * Alan Cox, * Tony Gale : Fixed reuse semantics. * Alan Cox : bind() shouldn't abort existing but dead * sockets. Stops FTP netin:.. I hope. * Alan Cox : bind() works correctly for RAW sockets. * Note that FreeBSD at least was broken * in this respect so be careful with * compatibility tests... * Alan Cox : routing cache support * Alan Cox : memzero the socket structure for * compactness. * Matt Day : nonblock connect error handler * Alan Cox : Allow large numbers of pending sockets * (eg for big web sites), but only if * specifically application requested. * Alan Cox : New buffering throughout IP. Used * dumbly. * Alan Cox : New buffering now used smartly. * Alan Cox : BSD rather than common sense * interpretation of listen. * Germano Caronni : Assorted small races. * Alan Cox : sendmsg/recvmsg basic support. * Alan Cox : Only sendmsg/recvmsg now supported. * Alan Cox : Locked down bind (see security list). * Alan Cox : Loosened bind a little. * Mike McLagan : ADD/DEL DLCI Ioctls * Willy Konynenberg : Transparent proxying support. * David S. Miller : New socket lookup architecture. * Some other random speedups. * Cyrus Durgin : Cleaned up file for kmod hacks. * Andi Kleen : Fix inet_stream_connect TCP race. */ #define pr_fmt(fmt) "IPv4: " fmt #include <linux/err.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/kmod.h> #include <linux/sched.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/capability.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/stat.h> #include <linux/init.h> #include <linux/poll.h> #include <linux/netfilter_ipv4.h> #include <linux/random.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/inet.h> #include <linux/igmp.h> #include <linux/inetdevice.h> #include <linux/netdevice.h> #include <net/checksum.h> #include <net/ip.h> #include <net/protocol.h> #include <net/arp.h> #include <net/route.h> #include <net/ip_fib.h> #include <net/inet_connection_sock.h> #include <net/gro.h> #include <net/gso.h> #include <net/tcp.h> #include <net/udp.h> #include <net/udplite.h> #include <net/ping.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/raw.h> #include <net/icmp.h> #include <net/inet_common.h> #include <net/ip_tunnels.h> #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/secure_seq.h> #ifdef CONFIG_IP_MROUTE #include <linux/mroute.h> #endif #include <net/l3mdev.h> #include <net/compat.h> #include <net/rps.h> #include <trace/events/sock.h> /* The inetsw table contains everything that inet_create needs to * build a new socket. */ static struct list_head inetsw[SOCK_MAX]; static DEFINE_SPINLOCK(inetsw_lock); /* New destruction routine */ void inet_sock_destruct(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); __skb_queue_purge(&sk->sk_receive_queue); __skb_queue_purge(&sk->sk_error_queue); sk_mem_reclaim_final(sk); if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) { pr_err("Attempt to release TCP socket in state %d %p\n", sk->sk_state, sk); return; } if (!sock_flag(sk, SOCK_DEAD)) { pr_err("Attempt to release alive inet socket %p\n", sk); return; } WARN_ON_ONCE(atomic_read(&sk->sk_rmem_alloc)); WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); WARN_ON_ONCE(sk->sk_wmem_queued); WARN_ON_ONCE(sk->sk_forward_alloc); kfree(rcu_dereference_protected(inet->inet_opt, 1)); dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); dst_release(rcu_dereference_protected(sk->sk_rx_dst, 1)); } EXPORT_SYMBOL(inet_sock_destruct); /* * The routines beyond this point handle the behaviour of an AF_INET * socket object. Mostly it punts to the subprotocols of IP to do * the work. */ /* * Automatically bind an unbound socket. */ static int inet_autobind(struct sock *sk) { struct inet_sock *inet; /* We may need to bind the socket. */ lock_sock(sk); inet = inet_sk(sk); if (!inet->inet_num) { if (sk->sk_prot->get_port(sk, 0)) { release_sock(sk); return -EAGAIN; } inet->inet_sport = htons(inet->inet_num); } release_sock(sk); return 0; } int __inet_listen_sk(struct sock *sk, int backlog) { unsigned char old_state = sk->sk_state; int err, tcp_fastopen; if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN))) return -EINVAL; WRITE_ONCE(sk->sk_max_ack_backlog, backlog); /* Really, if the socket is already in listen state * we can only allow the backlog to be adjusted. */ if (old_state != TCP_LISTEN) { /* Enable TFO w/o requiring TCP_FASTOPEN socket option. * Note that only TCP sockets (SOCK_STREAM) will reach here. * Also fastopen backlog may already been set via the option * because the socket was in TCP_LISTEN state previously but * was shutdown() rather than close(). */ tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && (tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { fastopen_queue_tune(sk, backlog); tcp_fastopen_init_key_once(sock_net(sk)); } err = inet_csk_listen_start(sk); if (err) return err; tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL); } return 0; } /* * Move a socket into listening state. */ int inet_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; int err = -EINVAL; lock_sock(sk); if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM) goto out; err = __inet_listen_sk(sk, backlog); out: release_sock(sk); return err; } EXPORT_SYMBOL(inet_listen); /* * Create an inet socket. */ static int inet_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; struct inet_protosw *answer; struct inet_sock *inet; struct proto *answer_prot; unsigned char answer_flags; int try_loading_module = 0; int err; if (protocol < 0 || protocol >= IPPROTO_MAX) return -EINVAL; sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ lookup_protocol: err = -ESOCKTNOSUPPORT; rcu_read_lock(); list_for_each_entry_rcu(answer, &inetsw[sock->type], list) { err = 0; /* Check the non-wild match. */ if (protocol == answer->protocol) { if (protocol != IPPROTO_IP) break; } else { /* Check for the two wild cases. */ if (IPPROTO_IP == protocol) { protocol = answer->protocol; break; } if (IPPROTO_IP == answer->protocol) break; } err = -EPROTONOSUPPORT; } if (unlikely(err)) { if (try_loading_module < 2) { rcu_read_unlock(); /* * Be more specific, e.g. net-pf-2-proto-132-type-1 * (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM) */ if (++try_loading_module == 1) request_module("net-pf-%d-proto-%d-type-%d", PF_INET, protocol, sock->type); /* * Fall back to generic, e.g. net-pf-2-proto-132 * (net-pf-PF_INET-proto-IPPROTO_SCTP) */ else request_module("net-pf-%d-proto-%d", PF_INET, protocol); goto lookup_protocol; } else goto out_rcu_unlock; } err = -EPERM; if (sock->type == SOCK_RAW && !kern && !ns_capable(net->user_ns, CAP_NET_RAW)) goto out_rcu_unlock; sock->ops = answer->ops; answer_prot = answer->prot; answer_flags = answer->flags; rcu_read_unlock(); WARN_ON(!answer_prot->slab); err = -ENOMEM; sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern); if (!sk) goto out; err = 0; if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; if (INET_PROTOSW_ICSK & answer_flags) inet_init_csk_locks(sk); inet = inet_sk(sk); inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags); inet_clear_bit(NODEFRAG, sk); if (SOCK_RAW == sock->type) { inet->inet_num = protocol; if (IPPROTO_RAW == protocol) inet_set_bit(HDRINCL, sk); } if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; atomic_set(&inet->inet_id, 0); sock_init_data(sock, sk); sk->sk_destruct = inet_sock_destruct; sk->sk_protocol = protocol; sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash); inet->uc_ttl = -1; inet_set_bit(MC_LOOP, sk); inet->mc_ttl = 1; inet_set_bit(MC_ALL, sk); inet->mc_index = 0; inet->mc_list = NULL; inet->rcv_tos = 0; if (inet->inet_num) { /* It assumes that any protocol which allows * the user to assign a number at socket * creation time automatically * shares. */ inet->inet_sport = htons(inet->inet_num); /* Add to protocol hash chains. */ err = sk->sk_prot->hash(sk); if (err) goto out_sk_release; } if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); if (err) goto out_sk_release; } if (!kern) { err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); if (err) goto out_sk_release; } out: return err; out_rcu_unlock: rcu_read_unlock(); goto out; out_sk_release: sk_common_release(sk); sock->sk = NULL; goto out; } /* * The peer socket should always be NULL (or else). When we call this * function we are destroying the object and from then on nobody * should refer to it. */ int inet_release(struct socket *sock) { struct sock *sk = sock->sk; if (sk) { long timeout; if (!sk->sk_kern_sock) BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk); /* Applications forget to leave groups before exiting */ ip_mc_drop_socket(sk); /* If linger is set, we don't return until the close * is complete. Otherwise we return immediately. The * actually closing is done the same either way. * * If the close is due to the process exiting, we never * linger.. */ timeout = 0; if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING)) timeout = sk->sk_lingertime; sk->sk_prot->close(sk, timeout); sock->sk = NULL; } return 0; } EXPORT_SYMBOL(inet_release); int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) { u32 flags = BIND_WITH_LOCK; int err; /* If the socket has its own bind function then use it. (RAW) */ if (sk->sk_prot->bind) { return sk->sk_prot->bind(sk, uaddr, addr_len); } if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len, CGROUP_INET4_BIND, &flags); if (err) return err; return __inet_bind(sk, uaddr, addr_len, flags); } int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { return inet_bind_sk(sock->sk, uaddr, addr_len); } EXPORT_SYMBOL(inet_bind); int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, u32 flags) { struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); unsigned short snum; int chk_addr_ret; u32 tb_id = RT_TABLE_LOCAL; int err; if (addr->sin_family != AF_INET) { /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET) * only if s_addr is INADDR_ANY. */ err = -EAFNOSUPPORT; if (addr->sin_family != AF_UNSPEC || addr->sin_addr.s_addr != htonl(INADDR_ANY)) goto out; } tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id; chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id); /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since * allowing applications to make a non-local bind solves * several problems with systems using dynamic addressing. * (ie. your servers still start up even if your ISDN link * is temporarily down) */ err = -EADDRNOTAVAIL; if (!inet_addr_valid_or_nonlocal(net, inet, addr->sin_addr.s_addr, chk_addr_ret)) goto out; snum = ntohs(addr->sin_port); err = -EACCES; if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) && snum && inet_port_requires_bind_service(net, snum) && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) goto out; /* We keep a pair of addresses. rcv_saddr is the one * used by hash lookups, and saddr is used for transmit. * * In the BSD API these are the same except where it * would be illegal to use them (multicast/broadcast) in * which case the sending device address is used. */ if (flags & BIND_WITH_LOCK) lock_sock(sk); /* Check these errors (active socket, double bind). */ err = -EINVAL; if (sk->sk_state != TCP_CLOSE || inet->inet_num) goto out_release_sock; inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) inet->inet_saddr = 0; /* Use device */ /* Make sure we are allowed to bind here. */ if (snum || !(inet_test_bit(BIND_ADDRESS_NO_PORT, sk) || (flags & BIND_FORCE_ADDRESS_NO_PORT))) { err = sk->sk_prot->get_port(sk, snum); if (err) { inet->inet_saddr = inet->inet_rcv_saddr = 0; goto out_release_sock; } if (!(flags & BIND_FROM_BPF)) { err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk); if (err) { inet->inet_saddr = inet->inet_rcv_saddr = 0; if (sk->sk_prot->put_port) sk->sk_prot->put_port(sk); goto out_release_sock; } } } if (inet->inet_rcv_saddr) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; inet->inet_sport = htons(inet->inet_num); inet->inet_daddr = 0; inet->inet_dport = 0; sk_dst_reset(sk); err = 0; out_release_sock: if (flags & BIND_WITH_LOCK) release_sock(sk); out: return err; } int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; const struct proto *prot; int err; if (addr_len < sizeof(uaddr->sa_family)) return -EINVAL; /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); if (uaddr->sa_family == AF_UNSPEC) return prot->disconnect(sk, flags); if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) { err = prot->pre_connect(sk, uaddr, addr_len); if (err) return err; } if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk)) return -EAGAIN; return prot->connect(sk, uaddr, addr_len); } EXPORT_SYMBOL(inet_dgram_connect); static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) { DEFINE_WAIT_FUNC(wait, woken_wake_function); add_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending += writebias; /* Basic assumption: if someone sets sk->sk_err, he _must_ * change state of the socket from TCP_SYN_*. * Connect() does not allow to get error notifications * without closing the socket. */ while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { release_sock(sk); timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); lock_sock(sk); if (signal_pending(current) || !timeo) break; } remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending -= writebias; return timeo; } /* * Connect to a remote host. There is regrettably still a little * TCP 'magic' in here. */ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags, int is_sendmsg) { struct sock *sk = sock->sk; int err; long timeo; /* * uaddr can be NULL and addr_len can be 0 if: * sk is a TCP fastopen active socket and * TCP_FASTOPEN_CONNECT sockopt is set and * we already have a valid cookie for this socket. * In this case, user can call write() after connect(). * write() will invoke tcp_sendmsg_fastopen() which calls * __inet_stream_connect(). */ if (uaddr) { if (addr_len < sizeof(uaddr->sa_family)) return -EINVAL; if (uaddr->sa_family == AF_UNSPEC) { sk->sk_disconnects++; err = sk->sk_prot->disconnect(sk, flags); sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; goto out; } } switch (sock->state) { default: err = -EINVAL; goto out; case SS_CONNECTED: err = -EISCONN; goto out; case SS_CONNECTING: if (inet_test_bit(DEFER_CONNECT, sk)) err = is_sendmsg ? -EINPROGRESS : -EISCONN; else err = -EALREADY; /* Fall out of switch with err, set for this state */ break; case SS_UNCONNECTED: err = -EISCONN; if (sk->sk_state != TCP_CLOSE) goto out; if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) { err = sk->sk_prot->pre_connect(sk, uaddr, addr_len); if (err) goto out; } err = sk->sk_prot->connect(sk, uaddr, addr_len); if (err < 0) goto out; sock->state = SS_CONNECTING; if (!err && inet_test_bit(DEFER_CONNECT, sk)) goto out; /* Just entered SS_CONNECTING state; the only * difference is that return value in non-blocking * case is EINPROGRESS, rather than EALREADY. */ err = -EINPROGRESS; break; } timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { int writebias = (sk->sk_protocol == IPPROTO_TCP) && tcp_sk(sk)->fastopen_req && tcp_sk(sk)->fastopen_req->data ? 1 : 0; int dis = sk->sk_disconnects; /* Error code is set above */ if (!timeo || !inet_wait_for_connect(sk, timeo, writebias)) goto out; err = sock_intr_errno(timeo); if (signal_pending(current)) goto out; if (dis != sk->sk_disconnects) { err = -EPIPE; goto out; } } /* Connection was closed by RST, timeout, ICMP error * or another process disconnected us. */ if (sk->sk_state == TCP_CLOSE) goto sock_error; /* sk->sk_err may be not zero now, if RECVERR was ordered by user * and error was received after socket entered established state. * Hence, it is handled normally after connect() return successfully. */ sock->state = SS_CONNECTED; err = 0; out: return err; sock_error: err = sock_error(sk) ? : -ECONNABORTED; sock->state = SS_UNCONNECTED; sk->sk_disconnects++; if (sk->sk_prot->disconnect(sk, flags)) sock->state = SS_DISCONNECTING; goto out; } EXPORT_SYMBOL(__inet_stream_connect); int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { int err; lock_sock(sock->sk); err = __inet_stream_connect(sock, uaddr, addr_len, flags, 0); release_sock(sock->sk); return err; } EXPORT_SYMBOL(inet_stream_connect); void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *newsk) { sock_rps_record_flow(newsk); WARN_ON(!((1 << newsk->sk_state) & (TCPF_ESTABLISHED | TCPF_SYN_RECV | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING | TCPF_CLOSE_WAIT | TCPF_CLOSE))); if (test_bit(SOCK_SUPPORT_ZC, &sock->flags)) set_bit(SOCK_SUPPORT_ZC, &newsock->flags); sock_graft(newsk, newsock); newsock->state = SS_CONNECTED; } /* * Accept a pending connection. The TCP layer now gives BSD semantics. */ int inet_accept(struct socket *sock, struct socket *newsock, struct proto_accept_arg *arg) { struct sock *sk1 = sock->sk, *sk2; /* IPV6_ADDRFORM can change sk->sk_prot under us. */ arg->err = -EINVAL; sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, arg); if (!sk2) return arg->err; lock_sock(sk2); __inet_accept(sock, newsock, sk2); release_sock(sk2); return 0; } EXPORT_SYMBOL(inet_accept); /* * This does both peername and sockname. */ int inet_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr); int sin_addr_len = sizeof(*sin); sin->sin_family = AF_INET; lock_sock(sk); if (peer) { if (!inet->inet_dport || (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && peer == 1)) { release_sock(sk); return -ENOTCONN; } sin->sin_port = inet->inet_dport; sin->sin_addr.s_addr = inet->inet_daddr; BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET4_GETPEERNAME); } else { __be32 addr = inet->inet_rcv_saddr; if (!addr) addr = inet->inet_saddr; sin->sin_port = inet->inet_sport; sin->sin_addr.s_addr = addr; BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET4_GETSOCKNAME); } release_sock(sk); memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); return sin_addr_len; } EXPORT_SYMBOL(inet_getname); int inet_send_prepare(struct sock *sk) { sock_rps_record_flow(sk); /* We may need to bind the socket. */ if (data_race(!inet_sk(sk)->inet_num) && !sk->sk_prot->no_autobind && inet_autobind(sk)) return -EAGAIN; return 0; } EXPORT_SYMBOL_GPL(inet_send_prepare); int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; if (unlikely(inet_send_prepare(sk))) return -EAGAIN; return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udp_sendmsg, sk, msg, size); } EXPORT_SYMBOL(inet_sendmsg); void inet_splice_eof(struct socket *sock) { const struct proto *prot; struct sock *sk = sock->sk; if (unlikely(inet_send_prepare(sk))) return; /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); if (prot->splice_eof) prot->splice_eof(sock); } EXPORT_SYMBOL_GPL(inet_splice_eof); INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *, size_t, int, int *)); int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; int addr_len = 0; int err; if (likely(!(flags & MSG_ERRQUEUE))) sock_rps_record_flow(sk); err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg, sk, msg, size, flags, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; return err; } EXPORT_SYMBOL(inet_recvmsg); int inet_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; int err = 0; /* This should really check to make sure * the socket is a TCP socket. (WHY AC...) */ how++; /* maps 0->1 has the advantage of making bit 1 rcvs and 1->2 bit 2 snds. 2->3 */ if ((how & ~SHUTDOWN_MASK) || !how) /* MAXINT->0 */ return -EINVAL; lock_sock(sk); if (sock->state == SS_CONNECTING) { if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE)) sock->state = SS_DISCONNECTING; else sock->state = SS_CONNECTED; } switch (sk->sk_state) { case TCP_CLOSE: err = -ENOTCONN; /* Hack to wake up other listeners, who can poll for EPOLLHUP, even on eg. unconnected UDP sockets -- RR */ fallthrough; default: WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | how); if (sk->sk_prot->shutdown) sk->sk_prot->shutdown(sk, how); break; /* Remaining two branches are temporary solution for missing * close() in multithreaded environment. It is _not_ a good idea, * but we have no choice until close() is repaired at VFS level. */ case TCP_LISTEN: if (!(how & RCV_SHUTDOWN)) break; fallthrough; case TCP_SYN_SENT: err = sk->sk_prot->disconnect(sk, O_NONBLOCK); sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; break; } /* Wake up anyone sleeping in poll. */ sk->sk_state_change(sk); release_sock(sk); return err; } EXPORT_SYMBOL(inet_shutdown); /* * ioctl() calls you can issue on an INET socket. Most of these are * device configuration and stuff and very rarely used. Some ioctls * pass on to the socket itself. * * NOTE: I like the idea of a module for the config stuff. ie ifconfig * loads the devconfigure module does its configuring and unloads it. * There's a good 20K of config code hanging around the kernel. */ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; int err = 0; struct net *net = sock_net(sk); void __user *p = (void __user *)arg; struct ifreq ifr; struct rtentry rt; switch (cmd) { case SIOCADDRT: case SIOCDELRT: if (copy_from_user(&rt, p, sizeof(struct rtentry))) return -EFAULT; err = ip_rt_ioctl(net, cmd, &rt); break; case SIOCRTMSG: err = -EINVAL; break; case SIOCDARP: case SIOCGARP: case SIOCSARP: err = arp_ioctl(net, cmd, (void __user *)arg); break; case SIOCGIFADDR: case SIOCGIFBRDADDR: case SIOCGIFNETMASK: case SIOCGIFDSTADDR: case SIOCGIFPFLAGS: if (get_user_ifreq(&ifr, NULL, p)) return -EFAULT; err = devinet_ioctl(net, cmd, &ifr); if (!err && put_user_ifreq(&ifr, p)) err = -EFAULT; break; case SIOCSIFADDR: case SIOCSIFBRDADDR: case SIOCSIFNETMASK: case SIOCSIFDSTADDR: case SIOCSIFPFLAGS: case SIOCSIFFLAGS: if (get_user_ifreq(&ifr, NULL, p)) return -EFAULT; err = devinet_ioctl(net, cmd, &ifr); break; default: if (sk->sk_prot->ioctl) err = sk_ioctl(sk, cmd, (void __user *)arg); else err = -ENOIOCTLCMD; break; } return err; } EXPORT_SYMBOL(inet_ioctl); #ifdef CONFIG_COMPAT static int inet_compat_routing_ioctl(struct sock *sk, unsigned int cmd, struct compat_rtentry __user *ur) { compat_uptr_t rtdev; struct rtentry rt; if (copy_from_user(&rt.rt_dst, &ur->rt_dst, 3 * sizeof(struct sockaddr)) || get_user(rt.rt_flags, &ur->rt_flags) || get_user(rt.rt_metric, &ur->rt_metric) || get_user(rt.rt_mtu, &ur->rt_mtu) || get_user(rt.rt_window, &ur->rt_window) || get_user(rt.rt_irtt, &ur->rt_irtt) || get_user(rtdev, &ur->rt_dev)) return -EFAULT; rt.rt_dev = compat_ptr(rtdev); return ip_rt_ioctl(sock_net(sk), cmd, &rt); } static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { void __user *argp = compat_ptr(arg); struct sock *sk = sock->sk; switch (cmd) { case SIOCADDRT: case SIOCDELRT: return inet_compat_routing_ioctl(sk, cmd, argp); default: if (!sk->sk_prot->compat_ioctl) return -ENOIOCTLCMD; return sk->sk_prot->compat_ioctl(sk, cmd, arg); } } #endif /* CONFIG_COMPAT */ const struct proto_ops inet_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, .bind = inet_bind, .connect = inet_stream_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet_getname, .poll = tcp_poll, .ioctl = inet_ioctl, .gettstamp = sock_gettstamp, .listen = inet_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, #ifdef CONFIG_MMU .mmap = tcp_mmap, #endif .splice_eof = inet_splice_eof, .splice_read = tcp_splice_read, .set_peek_off = sk_set_peek_off, .read_sock = tcp_read_sock, .read_skb = tcp_read_skb, .sendmsg_locked = tcp_sendmsg_locked, .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT .compat_ioctl = inet_compat_ioctl, #endif .set_rcvlowat = tcp_set_rcvlowat, }; EXPORT_SYMBOL(inet_stream_ops); const struct proto_ops inet_dgram_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, .bind = inet_bind, .connect = inet_dgram_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = inet_getname, .poll = udp_poll, .ioctl = inet_ioctl, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .read_skb = udp_read_skb, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .splice_eof = inet_splice_eof, .set_peek_off = udp_set_peek_off, #ifdef CONFIG_COMPAT .compat_ioctl = inet_compat_ioctl, #endif }; EXPORT_SYMBOL(inet_dgram_ops); /* * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without * udp_poll */ static const struct proto_ops inet_sockraw_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, .bind = inet_bind, .connect = inet_dgram_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = inet_getname, .poll = datagram_poll, .ioctl = inet_ioctl, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .splice_eof = inet_splice_eof, #ifdef CONFIG_COMPAT .compat_ioctl = inet_compat_ioctl, #endif }; static const struct net_proto_family inet_family_ops = { .family = PF_INET, .create = inet_create, .owner = THIS_MODULE, }; /* Upon startup we insert all the elements in inetsw_array[] into * the linked list inetsw. */ static struct inet_protosw inetsw_array[] = { { .type = SOCK_STREAM, .protocol = IPPROTO_TCP, .prot = &tcp_prot, .ops = &inet_stream_ops, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, }, { .type = SOCK_DGRAM, .protocol = IPPROTO_UDP, .prot = &udp_prot, .ops = &inet_dgram_ops, .flags = INET_PROTOSW_PERMANENT, }, { .type = SOCK_DGRAM, .protocol = IPPROTO_ICMP, .prot = &ping_prot, .ops = &inet_sockraw_ops, .flags = INET_PROTOSW_REUSE, }, { .type = SOCK_RAW, .protocol = IPPROTO_IP, /* wild card */ .prot = &raw_prot, .ops = &inet_sockraw_ops, .flags = INET_PROTOSW_REUSE, } }; #define INETSW_ARRAY_LEN ARRAY_SIZE(inetsw_array) void inet_register_protosw(struct inet_protosw *p) { struct list_head *lh; struct inet_protosw *answer; int protocol = p->protocol; struct list_head *last_perm; spin_lock_bh(&inetsw_lock); if (p->type >= SOCK_MAX) goto out_illegal; /* If we are trying to override a permanent protocol, bail. */ last_perm = &inetsw[p->type]; list_for_each(lh, &inetsw[p->type]) { answer = list_entry(lh, struct inet_protosw, list); /* Check only the non-wild match. */ if ((INET_PROTOSW_PERMANENT & answer->flags) == 0) break; if (protocol == answer->protocol) goto out_permanent; last_perm = lh; } /* Add the new entry after the last permanent entry if any, so that * the new entry does not override a permanent entry when matched with * a wild-card protocol. But it is allowed to override any existing * non-permanent entry. This means that when we remove this entry, the * system automatically returns to the old behavior. */ list_add_rcu(&p->list, last_perm); out: spin_unlock_bh(&inetsw_lock); return; out_permanent: pr_err("Attempt to override permanent protocol %d\n", protocol); goto out; out_illegal: pr_err("Ignoring attempt to register invalid socket type %d\n", p->type); goto out; } EXPORT_SYMBOL(inet_register_protosw); void inet_unregister_protosw(struct inet_protosw *p) { if (INET_PROTOSW_PERMANENT & p->flags) { pr_err("Attempt to unregister permanent protocol %d\n", p->protocol); } else { spin_lock_bh(&inetsw_lock); list_del_rcu(&p->list); spin_unlock_bh(&inetsw_lock); synchronize_net(); } } EXPORT_SYMBOL(inet_unregister_protosw); static int inet_sk_reselect_saddr(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); __be32 old_saddr = inet->inet_saddr; __be32 daddr = inet->inet_daddr; struct flowi4 *fl4; struct rtable *rt; __be32 new_saddr; struct ip_options_rcu *inet_opt; int err; inet_opt = rcu_dereference_protected(inet->inet_opt, lockdep_sock_is_held(sk)); if (inet_opt && inet_opt->opt.srr) daddr = inet_opt->opt.faddr; /* Query new route. */ fl4 = &inet->cork.fl.u.ip4; rt = ip_route_connect(fl4, daddr, 0, sk->sk_bound_dev_if, sk->sk_protocol, inet->inet_sport, inet->inet_dport, sk); if (IS_ERR(rt)) return PTR_ERR(rt); new_saddr = fl4->saddr; if (new_saddr == old_saddr) { sk_setup_caps(sk, &rt->dst); return 0; } err = inet_bhash2_update_saddr(sk, &new_saddr, AF_INET); if (err) { ip_rt_put(rt); return err; } sk_setup_caps(sk, &rt->dst); if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) { pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n", __func__, &old_saddr, &new_saddr); } /* * XXX The only one ugly spot where we need to * XXX really change the sockets identity after * XXX it has entered the hashes. -DaveM * * Besides that, it does not check for connection * uniqueness. Wait for troubles. */ return __sk_prot_rehash(sk); } int inet_sk_rebuild_header(struct sock *sk) { struct rtable *rt = dst_rtable(__sk_dst_check(sk, 0)); struct inet_sock *inet = inet_sk(sk); struct flowi4 *fl4; int err; /* Route is OK, nothing to do. */ if (rt) return 0; /* Reroute. */ fl4 = &inet->cork.fl.u.ip4; inet_sk_init_flowi4(inet, fl4); rt = ip_route_output_flow(sock_net(sk), fl4, sk); if (!IS_ERR(rt)) { err = 0; sk_setup_caps(sk, &rt->dst); } else { err = PTR_ERR(rt); /* Routing failed... */ sk->sk_route_caps = 0; if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) || sk->sk_state != TCP_SYN_SENT || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || (err = inet_sk_reselect_saddr(sk)) != 0) WRITE_ONCE(sk->sk_err_soft, -err); } return err; } EXPORT_SYMBOL(inet_sk_rebuild_header); void inet_sk_set_state(struct sock *sk, int state) { trace_inet_sock_set_state(sk, sk->sk_state, state); sk->sk_state = state; } EXPORT_SYMBOL(inet_sk_set_state); void inet_sk_state_store(struct sock *sk, int newstate) { trace_inet_sock_set_state(sk, sk->sk_state, newstate); smp_store_release(&sk->sk_state, newstate); } struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { bool udpfrag = false, fixedid = false, gso_partial, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; unsigned int offset = 0; struct iphdr *iph; int proto, tot_len; int nhoff; int ihl; int id; skb_reset_network_header(skb); nhoff = skb_network_header(skb) - skb_mac_header(skb); if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) goto out; iph = ip_hdr(skb); ihl = iph->ihl * 4; if (ihl < sizeof(*iph)) goto out; id = ntohs(iph->id); proto = iph->protocol; /* Warning: after this point, iph might be no longer valid */ if (unlikely(!pskb_may_pull(skb, ihl))) goto out; __skb_pull(skb, ihl); encap = SKB_GSO_CB(skb)->encap_level > 0; if (encap) features &= skb->dev->hw_enc_features; SKB_GSO_CB(skb)->encap_level += ihl; skb_reset_transport_header(skb); segs = ERR_PTR(-EPROTONOSUPPORT); if (!skb->encapsulation || encap) { udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID); /* fixed ID is invalid if DF bit is not set */ if (fixedid && !(ip_hdr(skb)->frag_off & htons(IP_DF))) goto out; } ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) { segs = ops->callbacks.gso_segment(skb, features); if (!segs) skb->network_header = skb_mac_header(skb) + nhoff - skb->head; } if (IS_ERR_OR_NULL(segs)) goto out; gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); skb = segs; do { iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); if (udpfrag) { iph->frag_off = htons(offset >> 3); if (skb->next) iph->frag_off |= htons(IP_MF); offset += skb->len - nhoff - ihl; tot_len = skb->len - nhoff; } else if (skb_is_gso(skb)) { if (!fixedid) { iph->id = htons(id); id += skb_shinfo(skb)->gso_segs; } if (gso_partial) tot_len = skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)iph; else tot_len = skb->len - nhoff; } else { if (!fixedid) iph->id = htons(id++); tot_len = skb->len - nhoff; } iph->tot_len = htons(tot_len); ip_send_check(iph); if (encap) skb_reset_inner_headers(skb); skb->network_header = (u8 *)iph - skb->head; skb_reset_mac_len(skb); } while ((skb = skb->next)); out: return segs; } static struct sk_buff *ipip_gso_segment(struct sk_buff *skb, netdev_features_t features) { if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP4)) return ERR_PTR(-EINVAL); return inet_gso_segment(skb, features); } struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb) { const struct net_offload *ops; struct sk_buff *pp = NULL; const struct iphdr *iph; struct sk_buff *p; unsigned int hlen; unsigned int off; int flush = 1; int proto; off = skb_gro_offset(skb); hlen = off + sizeof(*iph); iph = skb_gro_header(skb, hlen, off); if (unlikely(!iph)) goto out; proto = iph->protocol; ops = rcu_dereference(inet_offloads[proto]); if (!ops || !ops->callbacks.gro_receive) goto out; if (*(u8 *)iph != 0x45) goto out; if (ip_is_fragment(iph)) goto out; if (unlikely(ip_fast_csum((u8 *)iph, 5))) goto out; NAPI_GRO_CB(skb)->proto = proto; flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (ntohl(*(__be32 *)&iph->id) & ~IP_DF)); list_for_each_entry(p, head, list) { struct iphdr *iph2; if (!NAPI_GRO_CB(p)->same_flow) continue; iph2 = (struct iphdr *)(p->data + off); /* The above works because, with the exception of the top * (inner most) layer, we only aggregate pkts with the same * hdr length so all the hdrs we'll need to verify will start * at the same offset. */ if ((iph->protocol ^ iph2->protocol) | ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { NAPI_GRO_CB(p)->same_flow = 0; continue; } } NAPI_GRO_CB(skb)->flush |= flush; NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = off; /* Note : No need to call skb_gro_postpull_rcsum() here, * as we already checked checksum over ipv4 header was 0 */ skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); pp = indirect_call_gro_receive(tcp4_gro_receive, udp4_gro_receive, ops->callbacks.gro_receive, head, skb); out: skb_gro_flush_final(skb, pp, flush); return pp; } static struct sk_buff *ipip_gro_receive(struct list_head *head, struct sk_buff *skb) { if (NAPI_GRO_CB(skb)->encap_mark) { NAPI_GRO_CB(skb)->flush = 1; return NULL; } NAPI_GRO_CB(skb)->encap_mark = 1; return inet_gro_receive(head, skb); } #define SECONDS_PER_DAY 86400 /* inet_current_timestamp - Return IP network timestamp * * Return milliseconds since midnight in network byte order. */ __be32 inet_current_timestamp(void) { u32 secs; u32 msecs; struct timespec64 ts; ktime_get_real_ts64(&ts); /* Get secs since midnight. */ (void)div_u64_rem(ts.tv_sec, SECONDS_PER_DAY, &secs); /* Convert to msecs. */ msecs = secs * MSEC_PER_SEC; /* Convert nsec to msec. */ msecs += (u32)ts.tv_nsec / NSEC_PER_MSEC; /* Convert to network byte order. */ return htonl(msecs); } EXPORT_SYMBOL(inet_current_timestamp); int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { unsigned int family = READ_ONCE(sk->sk_family); if (family == AF_INET) return ip_recv_error(sk, msg, len, addr_len); #if IS_ENABLED(CONFIG_IPV6) if (family == AF_INET6) return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len); #endif return -EINVAL; } EXPORT_SYMBOL(inet_recv_error); int inet_gro_complete(struct sk_buff *skb, int nhoff) { struct iphdr *iph = (struct iphdr *)(skb->data + nhoff); const struct net_offload *ops; __be16 totlen = iph->tot_len; int proto = iph->protocol; int err = -ENOSYS; if (skb->encapsulation) { skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IP)); skb_set_inner_network_header(skb, nhoff); } iph_set_totlen(iph, skb->len - nhoff); csum_replace2(&iph->check, totlen, iph->tot_len); ops = rcu_dereference(inet_offloads[proto]); if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out; /* Only need to add sizeof(*iph) to get to the next hdr below * because any hdr with option will have been flushed in * inet_gro_receive(). */ err = INDIRECT_CALL_2(ops->callbacks.gro_complete, tcp4_gro_complete, udp4_gro_complete, skb, nhoff + sizeof(*iph)); out: return err; } static int ipip_gro_complete(struct sk_buff *skb, int nhoff) { skb->encapsulation = 1; skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4; return inet_gro_complete(skb, nhoff); } int inet_ctl_sock_create(struct sock **sk, unsigned short family, unsigned short type, unsigned char protocol, struct net *net) { struct socket *sock; int rc = sock_create_kern(net, family, type, protocol, &sock); if (rc == 0) { *sk = sock->sk; (*sk)->sk_allocation = GFP_ATOMIC; (*sk)->sk_use_task_frag = false; /* * Unhash it so that IP input processing does not even see it, * we do not wish this socket to see incoming packets. */ (*sk)->sk_prot->unhash(*sk); } return rc; } EXPORT_SYMBOL_GPL(inet_ctl_sock_create); unsigned long snmp_fold_field(void __percpu *mib, int offt) { unsigned long res = 0; int i; for_each_possible_cpu(i) res += snmp_get_cpu_field(mib, i, offt); return res; } EXPORT_SYMBOL_GPL(snmp_fold_field); #if BITS_PER_LONG==32 u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offt, size_t syncp_offset) { void *bhptr; struct u64_stats_sync *syncp; u64 v; unsigned int start; bhptr = per_cpu_ptr(mib, cpu); syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); do { start = u64_stats_fetch_begin(syncp); v = *(((u64 *)bhptr) + offt); } while (u64_stats_fetch_retry(syncp, start)); return v; } EXPORT_SYMBOL_GPL(snmp_get_cpu_field64); u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset) { u64 res = 0; int cpu; for_each_possible_cpu(cpu) { res += snmp_get_cpu_field64(mib, cpu, offt, syncp_offset); } return res; } EXPORT_SYMBOL_GPL(snmp_fold_field64); #endif #ifdef CONFIG_IP_MULTICAST static const struct net_protocol igmp_protocol = { .handler = igmp_rcv, }; #endif static const struct net_protocol icmp_protocol = { .handler = icmp_rcv, .err_handler = icmp_err, .no_policy = 1, }; static __net_init int ipv4_mib_init_net(struct net *net) { int i; net->mib.tcp_statistics = alloc_percpu(struct tcp_mib); if (!net->mib.tcp_statistics) goto err_tcp_mib; net->mib.ip_statistics = alloc_percpu(struct ipstats_mib); if (!net->mib.ip_statistics) goto err_ip_mib; for_each_possible_cpu(i) { struct ipstats_mib *af_inet_stats; af_inet_stats = per_cpu_ptr(net->mib.ip_statistics, i); u64_stats_init(&af_inet_stats->syncp); } net->mib.net_statistics = alloc_percpu(struct linux_mib); if (!net->mib.net_statistics) goto err_net_mib; net->mib.udp_statistics = alloc_percpu(struct udp_mib); if (!net->mib.udp_statistics) goto err_udp_mib; net->mib.udplite_statistics = alloc_percpu(struct udp_mib); if (!net->mib.udplite_statistics) goto err_udplite_mib; net->mib.icmp_statistics = alloc_percpu(struct icmp_mib); if (!net->mib.icmp_statistics) goto err_icmp_mib; net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib), GFP_KERNEL); if (!net->mib.icmpmsg_statistics) goto err_icmpmsg_mib; tcp_mib_init(net); return 0; err_icmpmsg_mib: free_percpu(net->mib.icmp_statistics); err_icmp_mib: free_percpu(net->mib.udplite_statistics); err_udplite_mib: free_percpu(net->mib.udp_statistics); err_udp_mib: free_percpu(net->mib.net_statistics); err_net_mib: free_percpu(net->mib.ip_statistics); err_ip_mib: free_percpu(net->mib.tcp_statistics); err_tcp_mib: return -ENOMEM; } static __net_exit void ipv4_mib_exit_net(struct net *net) { kfree(net->mib.icmpmsg_statistics); free_percpu(net->mib.icmp_statistics); free_percpu(net->mib.udplite_statistics); free_percpu(net->mib.udp_statistics); free_percpu(net->mib.net_statistics); free_percpu(net->mib.ip_statistics); free_percpu(net->mib.tcp_statistics); #ifdef CONFIG_MPTCP /* allocated on demand, see mptcp_init_sock() */ free_percpu(net->mib.mptcp_statistics); #endif } static __net_initdata struct pernet_operations ipv4_mib_ops = { .init = ipv4_mib_init_net, .exit = ipv4_mib_exit_net, }; static int __init init_ipv4_mibs(void) { return register_pernet_subsys(&ipv4_mib_ops); } static __net_init int inet_init_net(struct net *net) { /* * Set defaults for local port range */ net->ipv4.ip_local_ports.range = 60999u << 16 | 32768u; seqlock_init(&net->ipv4.ping_group_range.lock); /* * Sane defaults - nobody may create ping sockets. * Boot scripts should set this to distro-specific group. */ net->ipv4.ping_group_range.range[0] = make_kgid(&init_user_ns, 1); net->ipv4.ping_group_range.range[1] = make_kgid(&init_user_ns, 0); /* Default values for sysctl-controlled parameters. * We set them here, in case sysctl is not compiled. */ net->ipv4.sysctl_ip_default_ttl = IPDEFTTL; net->ipv4.sysctl_ip_fwd_update_priority = 1; net->ipv4.sysctl_ip_dynaddr = 0; net->ipv4.sysctl_ip_early_demux = 1; net->ipv4.sysctl_udp_early_demux = 1; net->ipv4.sysctl_tcp_early_demux = 1; net->ipv4.sysctl_nexthop_compat_mode = 1; #ifdef CONFIG_SYSCTL net->ipv4.sysctl_ip_prot_sock = PROT_SOCK; #endif /* Some igmp sysctl, whose values are always used */ net->ipv4.sysctl_igmp_max_memberships = 20; net->ipv4.sysctl_igmp_max_msf = 10; /* IGMP reports for link-local multicast groups are enabled by default */ net->ipv4.sysctl_igmp_llm_reports = 1; net->ipv4.sysctl_igmp_qrv = 2; net->ipv4.sysctl_fib_notify_on_flag_change = 0; return 0; } static __net_initdata struct pernet_operations af_inet_ops = { .init = inet_init_net, }; static int __init init_inet_pernet_ops(void) { return register_pernet_subsys(&af_inet_ops); } static int ipv4_proc_init(void); /* * IP protocol layer initialiser */ static const struct net_offload ipip_offload = { .callbacks = { .gso_segment = ipip_gso_segment, .gro_receive = ipip_gro_receive, .gro_complete = ipip_gro_complete, }, }; static int __init ipip_offload_init(void) { return inet_add_offload(&ipip_offload, IPPROTO_IPIP); } static int __init ipv4_offload_init(void) { /* * Add offloads */ if (udpv4_offload_init() < 0) pr_crit("%s: Cannot add UDP protocol offload\n", __func__); if (tcpv4_offload_init() < 0) pr_crit("%s: Cannot add TCP protocol offload\n", __func__); if (ipip_offload_init() < 0) pr_crit("%s: Cannot add IPIP protocol offload\n", __func__); net_hotdata.ip_packet_offload = (struct packet_offload) { .type = cpu_to_be16(ETH_P_IP), .callbacks = { .gso_segment = inet_gso_segment, .gro_receive = inet_gro_receive, .gro_complete = inet_gro_complete, }, }; dev_add_offload(&net_hotdata.ip_packet_offload); return 0; } fs_initcall(ipv4_offload_init); static struct packet_type ip_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_IP), .func = ip_rcv, .list_func = ip_list_rcv, }; static int __init inet_init(void) { struct inet_protosw *q; struct list_head *r; int rc; sock_skb_cb_check_size(sizeof(struct inet_skb_parm)); raw_hashinfo_init(&raw_v4_hashinfo); rc = proto_register(&tcp_prot, 1); if (rc) goto out; rc = proto_register(&udp_prot, 1); if (rc) goto out_unregister_tcp_proto; rc = proto_register(&raw_prot, 1); if (rc) goto out_unregister_udp_proto; rc = proto_register(&ping_prot, 1); if (rc) goto out_unregister_raw_proto; /* * Tell SOCKET that we are alive... */ (void)sock_register(&inet_family_ops); #ifdef CONFIG_SYSCTL ip_static_sysctl_init(); #endif /* * Add all the base protocols. */ if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0) pr_crit("%s: Cannot add ICMP protocol\n", __func__); net_hotdata.udp_protocol = (struct net_protocol) { .handler = udp_rcv, .err_handler = udp_err, .no_policy = 1, }; if (inet_add_protocol(&net_hotdata.udp_protocol, IPPROTO_UDP) < 0) pr_crit("%s: Cannot add UDP protocol\n", __func__); net_hotdata.tcp_protocol = (struct net_protocol) { .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, .no_policy = 1, .icmp_strict_tag_validation = 1, }; if (inet_add_protocol(&net_hotdata.tcp_protocol, IPPROTO_TCP) < 0) pr_crit("%s: Cannot add TCP protocol\n", __func__); #ifdef CONFIG_IP_MULTICAST if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0) pr_crit("%s: Cannot add IGMP protocol\n", __func__); #endif /* Register the socket-side information for inet_create. */ for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r) INIT_LIST_HEAD(r); for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q) inet_register_protosw(q); /* * Set the ARP module up */ arp_init(); /* * Set the IP module up */ ip_init(); /* Initialise per-cpu ipv4 mibs */ if (init_ipv4_mibs()) panic("%s: Cannot init ipv4 mibs\n", __func__); /* Setup TCP slab cache for open requests. */ tcp_init(); /* Setup UDP memory threshold */ udp_init(); /* Add UDP-Lite (RFC 3828) */ udplite4_register(); raw_init(); ping_init(); /* * Set the ICMP layer up */ if (icmp_init() < 0) panic("Failed to create the ICMP control socket.\n"); /* * Initialise the multicast router */ #if defined(CONFIG_IP_MROUTE) if (ip_mr_init()) pr_crit("%s: Cannot init ipv4 mroute\n", __func__); #endif if (init_inet_pernet_ops()) pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__); ipv4_proc_init(); ipfrag_init(); dev_add_pack(&ip_packet_type); ip_tunnel_core_init(); rc = 0; out: return rc; out_unregister_raw_proto: proto_unregister(&raw_prot); out_unregister_udp_proto: proto_unregister(&udp_prot); out_unregister_tcp_proto: proto_unregister(&tcp_prot); goto out; } fs_initcall(inet_init); /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS static int __init ipv4_proc_init(void) { int rc = 0; if (raw_proc_init()) goto out_raw; if (tcp4_proc_init()) goto out_tcp; if (udp4_proc_init()) goto out_udp; if (ping_proc_init()) goto out_ping; if (ip_misc_proc_init()) goto out_misc; out: return rc; out_misc: ping_proc_exit(); out_ping: udp4_proc_exit(); out_udp: tcp4_proc_exit(); out_tcp: raw_proc_exit(); out_raw: rc = -ENOMEM; goto out; } #else /* CONFIG_PROC_FS */ static int __init ipv4_proc_init(void) { return 0; } #endif /* CONFIG_PROC_FS */ |
| 1 1 1 1 1 1 1 1 1 1 1 1 1 1 7 7 7 3 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 | // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2019 Mellanox Technologies. All rights reserved. */ #include <rdma/ib_verbs.h> #include <rdma/rdma_counter.h> #include "core_priv.h" #include "restrack.h" #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID) static int __counter_set_mode(struct rdma_port_counter *port_counter, enum rdma_nl_counter_mode new_mode, enum rdma_nl_counter_mask new_mask, bool bind_opcnt) { if (new_mode == RDMA_COUNTER_MODE_AUTO) { if (new_mask & (~ALL_AUTO_MODE_MASKS)) return -EINVAL; if (port_counter->num_counters) return -EBUSY; } port_counter->mode.mode = new_mode; port_counter->mode.mask = new_mask; port_counter->mode.bind_opcnt = bind_opcnt; return 0; } /* * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode * * @dev: Device to operate * @port: Port to use * @mask: Mask to configure * @extack: Message to the user * * Return 0 on success. If counter mode wasn't changed then it is considered * as success as well. * Return -EBUSY when changing to auto mode while there are bounded counters. * */ int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port, enum rdma_nl_counter_mask mask, bool bind_opcnt, struct netlink_ext_ack *extack) { struct rdma_port_counter *port_counter; enum rdma_nl_counter_mode mode; int ret; port_counter = &dev->port_data[port].port_counter; if (!port_counter->hstats) return -EOPNOTSUPP; mutex_lock(&port_counter->lock); if (mask) mode = RDMA_COUNTER_MODE_AUTO; else mode = (port_counter->num_counters) ? RDMA_COUNTER_MODE_MANUAL : RDMA_COUNTER_MODE_NONE; if (port_counter->mode.mode == mode && port_counter->mode.mask == mask && port_counter->mode.bind_opcnt == bind_opcnt) { ret = 0; goto out; } ret = __counter_set_mode(port_counter, mode, mask, bind_opcnt); out: mutex_unlock(&port_counter->lock); if (ret == -EBUSY) NL_SET_ERR_MSG( extack, "Modifying auto mode is not allowed when there is a bound QP"); return ret; } static void auto_mode_init_counter(struct rdma_counter *counter, const struct ib_qp *qp, enum rdma_nl_counter_mask new_mask) { struct auto_mode_param *param = &counter->mode.param; counter->mode.mode = RDMA_COUNTER_MODE_AUTO; counter->mode.mask = new_mask; if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) param->qp_type = qp->qp_type; } static int __rdma_counter_bind_qp(struct rdma_counter *counter, struct ib_qp *qp, u32 port) { int ret; if (qp->counter) return -EINVAL; if (!qp->device->ops.counter_bind_qp) return -EOPNOTSUPP; mutex_lock(&counter->lock); ret = qp->device->ops.counter_bind_qp(counter, qp, port); mutex_unlock(&counter->lock); return ret; } int rdma_counter_modify(struct ib_device *dev, u32 port, unsigned int index, bool enable) { struct rdma_hw_stats *stats; int ret = 0; if (!dev->ops.modify_hw_stat) return -EOPNOTSUPP; stats = ib_get_hw_stats_port(dev, port); if (!stats || index >= stats->num_counters || !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) return -EINVAL; mutex_lock(&stats->lock); if (enable != test_bit(index, stats->is_disabled)) goto out; ret = dev->ops.modify_hw_stat(dev, port, index, enable); if (ret) goto out; if (enable) clear_bit(index, stats->is_disabled); else set_bit(index, stats->is_disabled); out: mutex_unlock(&stats->lock); return ret; } static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port, struct ib_qp *qp, enum rdma_nl_counter_mode mode, bool bind_opcnt) { struct rdma_port_counter *port_counter; struct rdma_counter *counter; int ret; if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats) return NULL; counter = rdma_zalloc_drv_obj(dev, rdma_counter); if (!counter) return NULL; counter->device = dev; counter->port = port; dev->ops.counter_init(counter); rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER); counter->stats = dev->ops.counter_alloc_stats(counter); if (!counter->stats) goto err_stats; port_counter = &dev->port_data[port].port_counter; mutex_lock(&port_counter->lock); switch (mode) { case RDMA_COUNTER_MODE_MANUAL: ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL, 0, bind_opcnt); if (ret) { mutex_unlock(&port_counter->lock); goto err_mode; } break; case RDMA_COUNTER_MODE_AUTO: auto_mode_init_counter(counter, qp, port_counter->mode.mask); break; default: ret = -EOPNOTSUPP; mutex_unlock(&port_counter->lock); goto err_mode; } port_counter->num_counters++; mutex_unlock(&port_counter->lock); counter->mode.mode = mode; counter->mode.bind_opcnt = bind_opcnt; kref_init(&counter->kref); mutex_init(&counter->lock); ret = __rdma_counter_bind_qp(counter, qp, port); if (ret) goto err_mode; rdma_restrack_parent_name(&counter->res, &qp->res); rdma_restrack_add(&counter->res); return counter; err_mode: rdma_free_hw_stats_struct(counter->stats); err_stats: rdma_restrack_put(&counter->res); kfree(counter); return NULL; } static void rdma_counter_free(struct rdma_counter *counter) { struct rdma_port_counter *port_counter; port_counter = &counter->device->port_data[counter->port].port_counter; mutex_lock(&port_counter->lock); port_counter->num_counters--; if (!port_counter->num_counters && (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL)) __counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0, false); mutex_unlock(&port_counter->lock); rdma_restrack_del(&counter->res); rdma_free_hw_stats_struct(counter->stats); kfree(counter); } static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, enum rdma_nl_counter_mask auto_mask) { struct auto_mode_param *param = &counter->mode.param; bool match = true; if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) match &= (param->qp_type == qp->qp_type); if (auto_mask & RDMA_COUNTER_MASK_PID) match &= (task_pid_nr(counter->res.task) == task_pid_nr(qp->res.task)); return match; } static int __rdma_counter_unbind_qp(struct ib_qp *qp, u32 port) { struct rdma_counter *counter = qp->counter; int ret; if (!qp->device->ops.counter_unbind_qp) return -EOPNOTSUPP; mutex_lock(&counter->lock); ret = qp->device->ops.counter_unbind_qp(qp, port); mutex_unlock(&counter->lock); return ret; } static void counter_history_stat_update(struct rdma_counter *counter) { struct ib_device *dev = counter->device; struct rdma_port_counter *port_counter; int i; port_counter = &dev->port_data[counter->port].port_counter; if (!port_counter->hstats) return; rdma_counter_query_stats(counter); for (i = 0; i < counter->stats->num_counters; i++) port_counter->hstats->value[i] += counter->stats->value[i]; } /* * rdma_get_counter_auto_mode - Find the counter that @qp should be bound * with in auto mode * * Return: The counter (with ref-count increased) if found */ static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp, u32 port) { struct rdma_port_counter *port_counter; struct rdma_counter *counter = NULL; struct ib_device *dev = qp->device; struct rdma_restrack_entry *res; struct rdma_restrack_root *rt; unsigned long id = 0; port_counter = &dev->port_data[port].port_counter; rt = &dev->res[RDMA_RESTRACK_COUNTER]; xa_lock(&rt->xa); xa_for_each(&rt->xa, id, res) { counter = container_of(res, struct rdma_counter, res); if ((counter->device != qp->device) || (counter->port != port)) goto next; if (auto_mode_match(qp, counter, port_counter->mode.mask)) break; next: counter = NULL; } if (counter && !kref_get_unless_zero(&counter->kref)) counter = NULL; xa_unlock(&rt->xa); return counter; } static void counter_release(struct kref *kref) { struct rdma_counter *counter; counter = container_of(kref, struct rdma_counter, kref); counter_history_stat_update(counter); counter->device->ops.counter_dealloc(counter); rdma_counter_free(counter); } /* * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on * the auto-mode rule */ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port) { struct rdma_port_counter *port_counter; struct ib_device *dev = qp->device; struct rdma_counter *counter; int ret; if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res)) return 0; if (!rdma_is_port_valid(dev, port)) return -EINVAL; port_counter = &dev->port_data[port].port_counter; if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) return 0; counter = rdma_get_counter_auto_mode(qp, port); if (counter) { ret = __rdma_counter_bind_qp(counter, qp, port); if (ret) { kref_put(&counter->kref, counter_release); return ret; } } else { counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO, port_counter->mode.bind_opcnt); if (!counter) return -ENOMEM; } return 0; } /* * rdma_counter_unbind_qp - Unbind a qp from a counter * @force: * true - Decrease the counter ref-count anyway (e.g., qp destroy) */ int rdma_counter_unbind_qp(struct ib_qp *qp, u32 port, bool force) { struct rdma_counter *counter = qp->counter; int ret; if (!counter) return -EINVAL; ret = __rdma_counter_unbind_qp(qp, port); if (ret && !force) return ret; kref_put(&counter->kref, counter_release); return 0; } int rdma_counter_query_stats(struct rdma_counter *counter) { struct ib_device *dev = counter->device; int ret; if (!dev->ops.counter_update_stats) return -EINVAL; mutex_lock(&counter->lock); ret = dev->ops.counter_update_stats(counter); mutex_unlock(&counter->lock); return ret; } static u64 get_running_counters_hwstat_sum(struct ib_device *dev, u32 port, u32 index) { struct rdma_restrack_entry *res; struct rdma_restrack_root *rt; struct rdma_counter *counter; unsigned long id = 0; u64 sum = 0; rt = &dev->res[RDMA_RESTRACK_COUNTER]; xa_lock(&rt->xa); xa_for_each(&rt->xa, id, res) { if (!rdma_restrack_get(res)) continue; xa_unlock(&rt->xa); counter = container_of(res, struct rdma_counter, res); if ((counter->device != dev) || (counter->port != port) || rdma_counter_query_stats(counter)) goto next; sum += counter->stats->value[index]; next: xa_lock(&rt->xa); rdma_restrack_put(res); } xa_unlock(&rt->xa); return sum; } /* * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a * specific port, including the running ones and history data */ u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u32 port, u32 index) { struct rdma_port_counter *port_counter; u64 sum; port_counter = &dev->port_data[port].port_counter; if (!port_counter->hstats) return 0; sum = get_running_counters_hwstat_sum(dev, port, index); sum += port_counter->hstats->value[index]; return sum; } static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) { struct rdma_restrack_entry *res = NULL; struct ib_qp *qp = NULL; res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num); if (IS_ERR(res)) return NULL; qp = container_of(res, struct ib_qp, res); if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) goto err; return qp; err: rdma_restrack_put(res); return NULL; } static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, u32 counter_id) { struct rdma_restrack_entry *res; struct rdma_counter *counter; res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id); if (IS_ERR(res)) return NULL; counter = container_of(res, struct rdma_counter, res); kref_get(&counter->kref); rdma_restrack_put(res); return counter; } /* * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id */ int rdma_counter_bind_qpn(struct ib_device *dev, u32 port, u32 qp_num, u32 counter_id) { struct rdma_port_counter *port_counter; struct rdma_counter *counter; struct ib_qp *qp; int ret; port_counter = &dev->port_data[port].port_counter; if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) return -EINVAL; qp = rdma_counter_get_qp(dev, qp_num); if (!qp) return -ENOENT; counter = rdma_get_counter_by_id(dev, counter_id); if (!counter) { ret = -ENOENT; goto err; } if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) { ret = -EINVAL; goto err_task; } if ((counter->device != qp->device) || (counter->port != qp->port)) { ret = -EINVAL; goto err_task; } ret = __rdma_counter_bind_qp(counter, qp, port); if (ret) goto err_task; rdma_restrack_put(&qp->res); return 0; err_task: kref_put(&counter->kref, counter_release); err: rdma_restrack_put(&qp->res); return ret; } /* * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it * The id of new counter is returned in @counter_id */ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u32 port, u32 qp_num, u32 *counter_id) { struct rdma_port_counter *port_counter; struct rdma_counter *counter; struct ib_qp *qp; int ret; if (!rdma_is_port_valid(dev, port)) return -EINVAL; port_counter = &dev->port_data[port].port_counter; if (!port_counter->hstats) return -EOPNOTSUPP; if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) return -EINVAL; qp = rdma_counter_get_qp(dev, qp_num); if (!qp) return -ENOENT; if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { ret = -EINVAL; goto err; } counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL, true); if (!counter) { ret = -ENOMEM; goto err; } if (counter_id) *counter_id = counter->id; rdma_restrack_put(&qp->res); return 0; err: rdma_restrack_put(&qp->res); return ret; } /* * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter */ int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port, u32 qp_num, u32 counter_id) { struct rdma_port_counter *port_counter; struct ib_qp *qp; int ret; if (!rdma_is_port_valid(dev, port)) return -EINVAL; qp = rdma_counter_get_qp(dev, qp_num); if (!qp) return -ENOENT; if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { ret = -EINVAL; goto out; } port_counter = &dev->port_data[port].port_counter; if (!qp->counter || qp->counter->id != counter_id || port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) { ret = -EINVAL; goto out; } ret = rdma_counter_unbind_qp(qp, port, false); out: rdma_restrack_put(&qp->res); return ret; } int rdma_counter_get_mode(struct ib_device *dev, u32 port, enum rdma_nl_counter_mode *mode, enum rdma_nl_counter_mask *mask, bool *opcnt) { struct rdma_port_counter *port_counter; port_counter = &dev->port_data[port].port_counter; *mode = port_counter->mode.mode; *mask = port_counter->mode.mask; *opcnt = port_counter->mode.bind_opcnt; return 0; } void rdma_counter_init(struct ib_device *dev) { struct rdma_port_counter *port_counter; u32 port, i; if (!dev->port_data) return; rdma_for_each_port(dev, port) { port_counter = &dev->port_data[port].port_counter; port_counter->mode.mode = RDMA_COUNTER_MODE_NONE; mutex_init(&port_counter->lock); if (!dev->ops.alloc_hw_port_stats) continue; port_counter->hstats = dev->ops.alloc_hw_port_stats(dev, port); if (!port_counter->hstats) goto fail; } return; fail: for (i = port; i >= rdma_start_port(dev); i--) { port_counter = &dev->port_data[port].port_counter; rdma_free_hw_stats_struct(port_counter->hstats); port_counter->hstats = NULL; mutex_destroy(&port_counter->lock); } } void rdma_counter_release(struct ib_device *dev) { struct rdma_port_counter *port_counter; u32 port; rdma_for_each_port(dev, port) { port_counter = &dev->port_data[port].port_counter; rdma_free_hw_stats_struct(port_counter->hstats); mutex_destroy(&port_counter->lock); } } |
| 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2014 Linaro Ltd * * Author: Ulf Hansson <ulf.hansson@linaro.org> * * MMC power sequence management */ #include <linux/kernel.h> #include <linux/err.h> #include <linux/module.h> #include <linux/of.h> #include <linux/mmc/host.h> #include "pwrseq.h" static DEFINE_MUTEX(pwrseq_list_mutex); static LIST_HEAD(pwrseq_list); int mmc_pwrseq_alloc(struct mmc_host *host) { struct device_node *np; struct mmc_pwrseq *p; np = of_parse_phandle(host->parent->of_node, "mmc-pwrseq", 0); if (!np) return 0; mutex_lock(&pwrseq_list_mutex); list_for_each_entry(p, &pwrseq_list, pwrseq_node) { if (device_match_of_node(p->dev, np)) { if (!try_module_get(p->owner)) dev_err(host->parent, "increasing module refcount failed\n"); else host->pwrseq = p; break; } } of_node_put(np); mutex_unlock(&pwrseq_list_mutex); if (!host->pwrseq) return -EPROBE_DEFER; dev_info(host->parent, "allocated mmc-pwrseq\n"); return 0; } void mmc_pwrseq_pre_power_on(struct mmc_host *host) { struct mmc_pwrseq *pwrseq = host->pwrseq; if (pwrseq && pwrseq->ops->pre_power_on) pwrseq->ops->pre_power_on(host); } void mmc_pwrseq_post_power_on(struct mmc_host *host) { struct mmc_pwrseq *pwrseq = host->pwrseq; if (pwrseq && pwrseq->ops->post_power_on) pwrseq->ops->post_power_on(host); } void mmc_pwrseq_power_off(struct mmc_host *host) { struct mmc_pwrseq *pwrseq = host->pwrseq; if (pwrseq && pwrseq->ops->power_off) pwrseq->ops->power_off(host); } void mmc_pwrseq_reset(struct mmc_host *host) { struct mmc_pwrseq *pwrseq = host->pwrseq; if (pwrseq && pwrseq->ops->reset) pwrseq->ops->reset(host); } void mmc_pwrseq_free(struct mmc_host *host) { struct mmc_pwrseq *pwrseq = host->pwrseq; if (pwrseq) { module_put(pwrseq->owner); host->pwrseq = NULL; } } int mmc_pwrseq_register(struct mmc_pwrseq *pwrseq) { if (!pwrseq || !pwrseq->ops || !pwrseq->dev) return -EINVAL; mutex_lock(&pwrseq_list_mutex); list_add(&pwrseq->pwrseq_node, &pwrseq_list); mutex_unlock(&pwrseq_list_mutex); return 0; } EXPORT_SYMBOL_GPL(mmc_pwrseq_register); void mmc_pwrseq_unregister(struct mmc_pwrseq *pwrseq) { if (pwrseq) { mutex_lock(&pwrseq_list_mutex); list_del(&pwrseq->pwrseq_node); mutex_unlock(&pwrseq_list_mutex); } } EXPORT_SYMBOL_GPL(mmc_pwrseq_unregister); |
| 1 1 5 4 4 3 2 12 11 4 6 2 8 7 5 1 3 2 2 3 3 3 3 3 1 1 1 1 1 1 1 1 1 13 13 12 12 12 13 2 2 2 3 4 3 3 4 27 27 24 22 4 22 3 1 3 24 1 7 5 9 7 7 16 8 16 7 8 9 16 9 9 1 1 1 1 1 1 1 16 16 16 16 1 16 9 7 16 1 1 16 1 16 5 16 1 16 16 16 16 16 7 1 16 16 16 14 14 23 23 3 2 21 19 19 1 18 18 17 16 15 2 14 14 1 14 14 14 14 14 14 13 13 10 2 12 10 10 2 4 10 10 10 10 8 10 23 22 1 1 21 22 11 11 3 48 41 47 46 46 45 39 7 42 41 6 41 40 39 39 39 39 38 4 36 3 36 34 32 12 9 30 29 28 2 27 24 23 21 10 10 20 10 10 5 5 5 10 10 20 12 5 3 2 12 13 5 3 15 16 1 24 47 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 | // SPDX-License-Identifier: GPL-2.0 #include <linux/fanotify.h> #include <linux/fcntl.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/anon_inodes.h> #include <linux/fsnotify_backend.h> #include <linux/init.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/poll.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/uaccess.h> #include <linux/compat.h> #include <linux/sched/signal.h> #include <linux/memcontrol.h> #include <linux/statfs.h> #include <linux/exportfs.h> #include <asm/ioctls.h> #include "../fsnotify.h" #include "../fdinfo.h" #include "fanotify.h" #define FANOTIFY_DEFAULT_MAX_EVENTS 16384 #define FANOTIFY_OLD_DEFAULT_MAX_MARKS 8192 #define FANOTIFY_DEFAULT_MAX_GROUPS 128 #define FANOTIFY_DEFAULT_FEE_POOL_SIZE 32 /* * Legacy fanotify marks limits (8192) is per group and we introduced a tunable * limit of marks per user, similar to inotify. Effectively, the legacy limit * of fanotify marks per user is <max marks per group> * <max groups per user>. * This default limit (1M) also happens to match the increased limit of inotify * max_user_watches since v5.10. */ #define FANOTIFY_DEFAULT_MAX_USER_MARKS \ (FANOTIFY_OLD_DEFAULT_MAX_MARKS * FANOTIFY_DEFAULT_MAX_GROUPS) /* * Most of the memory cost of adding an inode mark is pinning the marked inode. * The size of the filesystem inode struct is not uniform across filesystems, * so double the size of a VFS inode is used as a conservative approximation. */ #define INODE_MARK_COST (2 * sizeof(struct inode)) /* configurable via /proc/sys/fs/fanotify/ */ static int fanotify_max_queued_events __read_mostly; #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> static long ft_zero = 0; static long ft_int_max = INT_MAX; static const struct ctl_table fanotify_table[] = { { .procname = "max_user_groups", .data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS], .maxlen = sizeof(long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, .extra1 = &ft_zero, .extra2 = &ft_int_max, }, { .procname = "max_user_marks", .data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS], .maxlen = sizeof(long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, .extra1 = &ft_zero, .extra2 = &ft_int_max, }, { .procname = "max_queued_events", .data = &fanotify_max_queued_events, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO }, }; static void __init fanotify_sysctls_init(void) { register_sysctl("fs/fanotify", fanotify_table); } #else #define fanotify_sysctls_init() do { } while (0) #endif /* CONFIG_SYSCTL */ /* * All flags that may be specified in parameter event_f_flags of fanotify_init. * * Internal and external open flags are stored together in field f_flags of * struct file. Only external open flags shall be allowed in event_f_flags. * Internal flags like FMODE_EXEC shall be excluded. */ #define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \ O_ACCMODE | O_APPEND | O_NONBLOCK | \ __O_SYNC | O_DSYNC | O_CLOEXEC | \ O_LARGEFILE | O_NOATIME ) extern const struct fsnotify_ops fanotify_fsnotify_ops; struct kmem_cache *fanotify_mark_cache __ro_after_init; struct kmem_cache *fanotify_fid_event_cachep __ro_after_init; struct kmem_cache *fanotify_path_event_cachep __ro_after_init; struct kmem_cache *fanotify_perm_event_cachep __ro_after_init; struct kmem_cache *fanotify_mnt_event_cachep __ro_after_init; #define FANOTIFY_EVENT_ALIGN 4 #define FANOTIFY_FID_INFO_HDR_LEN \ (sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle)) #define FANOTIFY_PIDFD_INFO_LEN \ sizeof(struct fanotify_event_info_pidfd) #define FANOTIFY_ERROR_INFO_LEN \ (sizeof(struct fanotify_event_info_error)) #define FANOTIFY_RANGE_INFO_LEN \ (sizeof(struct fanotify_event_info_range)) #define FANOTIFY_MNT_INFO_LEN \ (sizeof(struct fanotify_event_info_mnt)) static int fanotify_fid_info_len(int fh_len, int name_len) { int info_len = fh_len; if (name_len) info_len += name_len + 1; return roundup(FANOTIFY_FID_INFO_HDR_LEN + info_len, FANOTIFY_EVENT_ALIGN); } /* FAN_RENAME may have one or two dir+name info records */ static int fanotify_dir_name_info_len(struct fanotify_event *event) { struct fanotify_info *info = fanotify_event_info(event); int dir_fh_len = fanotify_event_dir_fh_len(event); int dir2_fh_len = fanotify_event_dir2_fh_len(event); int info_len = 0; if (dir_fh_len) info_len += fanotify_fid_info_len(dir_fh_len, info->name_len); if (dir2_fh_len) info_len += fanotify_fid_info_len(dir2_fh_len, info->name2_len); return info_len; } static size_t fanotify_event_len(unsigned int info_mode, struct fanotify_event *event) { size_t event_len = FAN_EVENT_METADATA_LEN; int fh_len; int dot_len = 0; if (fanotify_is_error_event(event->mask)) event_len += FANOTIFY_ERROR_INFO_LEN; if (fanotify_event_has_any_dir_fh(event)) { event_len += fanotify_dir_name_info_len(event); } else if ((info_mode & FAN_REPORT_NAME) && (event->mask & FAN_ONDIR)) { /* * With group flag FAN_REPORT_NAME, if name was not recorded in * event on a directory, we will report the name ".". */ dot_len = 1; } if (fanotify_event_has_object_fh(event)) { fh_len = fanotify_event_object_fh_len(event); event_len += fanotify_fid_info_len(fh_len, dot_len); } if (fanotify_is_mnt_event(event->mask)) event_len += FANOTIFY_MNT_INFO_LEN; if (info_mode & FAN_REPORT_PIDFD) event_len += FANOTIFY_PIDFD_INFO_LEN; if (fanotify_event_has_access_range(event)) event_len += FANOTIFY_RANGE_INFO_LEN; return event_len; } /* * Remove an hashed event from merge hash table. */ static void fanotify_unhash_event(struct fsnotify_group *group, struct fanotify_event *event) { assert_spin_locked(&group->notification_lock); pr_debug("%s: group=%p event=%p bucket=%u\n", __func__, group, event, fanotify_event_hash_bucket(group, event)); if (WARN_ON_ONCE(hlist_unhashed(&event->merge_list))) return; hlist_del_init(&event->merge_list); } /* * Get an fanotify notification event if one exists and is small * enough to fit in "count". Return an error pointer if the count * is not large enough. When permission event is dequeued, its state is * updated accordingly. */ static struct fanotify_event *get_one_event(struct fsnotify_group *group, size_t count) { size_t event_size; struct fanotify_event *event = NULL; struct fsnotify_event *fsn_event; unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES); pr_debug("%s: group=%p count=%zd\n", __func__, group, count); spin_lock(&group->notification_lock); fsn_event = fsnotify_peek_first_event(group); if (!fsn_event) goto out; event = FANOTIFY_E(fsn_event); event_size = fanotify_event_len(info_mode, event); if (event_size > count) { event = ERR_PTR(-EINVAL); goto out; } /* * Held the notification_lock the whole time, so this is the * same event we peeked above. */ fsnotify_remove_first_event(group); if (fanotify_is_perm_event(event->mask)) FANOTIFY_PERM(event)->state = FAN_EVENT_REPORTED; if (fanotify_is_hashed_event(event->mask)) fanotify_unhash_event(group, event); out: spin_unlock(&group->notification_lock); return event; } static int create_fd(struct fsnotify_group *group, const struct path *path, struct file **file) { int client_fd; struct file *new_file; client_fd = get_unused_fd_flags(group->fanotify_data.f_flags); if (client_fd < 0) return client_fd; /* * We provide an fd for the userspace program, so it could access the * file without generating fanotify events itself. */ new_file = dentry_open_nonotify(path, group->fanotify_data.f_flags, current_cred()); if (IS_ERR(new_file)) { put_unused_fd(client_fd); client_fd = PTR_ERR(new_file); } else { *file = new_file; } return client_fd; } static int process_access_response_info(const char __user *info, size_t info_len, struct fanotify_response_info_audit_rule *friar) { if (info_len != sizeof(*friar)) return -EINVAL; if (copy_from_user(friar, info, sizeof(*friar))) return -EFAULT; if (friar->hdr.type != FAN_RESPONSE_INFO_AUDIT_RULE) return -EINVAL; if (friar->hdr.pad != 0) return -EINVAL; if (friar->hdr.len != sizeof(*friar)) return -EINVAL; return info_len; } /* * Finish processing of permission event by setting it to ANSWERED state and * drop group->notification_lock. */ static void finish_permission_event(struct fsnotify_group *group, struct fanotify_perm_event *event, u32 response, struct fanotify_response_info_audit_rule *friar) __releases(&group->notification_lock) { bool destroy = false; assert_spin_locked(&group->notification_lock); event->response = response & ~FAN_INFO; if (response & FAN_INFO) memcpy(&event->audit_rule, friar, sizeof(*friar)); if (event->state == FAN_EVENT_CANCELED) destroy = true; else event->state = FAN_EVENT_ANSWERED; spin_unlock(&group->notification_lock); if (destroy) fsnotify_destroy_event(group, &event->fae.fse); } static int process_access_response(struct fsnotify_group *group, struct fanotify_response *response_struct, const char __user *info, size_t info_len) { struct fanotify_perm_event *event; int fd = response_struct->fd; u32 response = response_struct->response; int errno = fanotify_get_response_errno(response); int ret = info_len; struct fanotify_response_info_audit_rule friar; pr_debug("%s: group=%p fd=%d response=%x errno=%d buf=%p size=%zu\n", __func__, group, fd, response, errno, info, info_len); /* * make sure the response is valid, if invalid we do nothing and either * userspace can send a valid response or we will clean it up after the * timeout */ if (response & ~FANOTIFY_RESPONSE_VALID_MASK) return -EINVAL; switch (response & FANOTIFY_RESPONSE_ACCESS) { case FAN_ALLOW: if (errno) return -EINVAL; break; case FAN_DENY: /* Custom errno is supported only for pre-content groups */ if (errno && group->priority != FSNOTIFY_PRIO_PRE_CONTENT) return -EINVAL; /* * Limit errno to values expected on open(2)/read(2)/write(2) * of regular files. */ switch (errno) { case 0: case EIO: case EPERM: case EBUSY: case ETXTBSY: case EAGAIN: case ENOSPC: case EDQUOT: break; default: return -EINVAL; } break; default: return -EINVAL; } if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT)) return -EINVAL; if (response & FAN_INFO) { ret = process_access_response_info(info, info_len, &friar); if (ret < 0) return ret; if (fd == FAN_NOFD) return ret; } else { ret = 0; } if (fd < 0) return -EINVAL; spin_lock(&group->notification_lock); list_for_each_entry(event, &group->fanotify_data.access_list, fae.fse.list) { if (event->fd != fd) continue; list_del_init(&event->fae.fse.list); finish_permission_event(group, event, response, &friar); wake_up(&group->fanotify_data.access_waitq); return ret; } spin_unlock(&group->notification_lock); return -ENOENT; } static size_t copy_mnt_info_to_user(struct fanotify_event *event, char __user *buf, int count) { struct fanotify_event_info_mnt info = { }; info.hdr.info_type = FAN_EVENT_INFO_TYPE_MNT; info.hdr.len = FANOTIFY_MNT_INFO_LEN; if (WARN_ON(count < info.hdr.len)) return -EFAULT; info.mnt_id = FANOTIFY_ME(event)->mnt_id; if (copy_to_user(buf, &info, sizeof(info))) return -EFAULT; return info.hdr.len; } static size_t copy_error_info_to_user(struct fanotify_event *event, char __user *buf, int count) { struct fanotify_event_info_error info = { }; struct fanotify_error_event *fee = FANOTIFY_EE(event); info.hdr.info_type = FAN_EVENT_INFO_TYPE_ERROR; info.hdr.len = FANOTIFY_ERROR_INFO_LEN; if (WARN_ON(count < info.hdr.len)) return -EFAULT; info.error = fee->error; info.error_count = fee->err_count; if (copy_to_user(buf, &info, sizeof(info))) return -EFAULT; return info.hdr.len; } static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, int info_type, const char *name, size_t name_len, char __user *buf, size_t count) { struct fanotify_event_info_fid info = { }; struct file_handle handle = { }; unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh_buf; size_t fh_len = fh ? fh->len : 0; size_t info_len = fanotify_fid_info_len(fh_len, name_len); size_t len = info_len; pr_debug("%s: fh_len=%zu name_len=%zu, info_len=%zu, count=%zu\n", __func__, fh_len, name_len, info_len, count); if (WARN_ON_ONCE(len < sizeof(info) || len > count)) return -EFAULT; /* * Copy event info fid header followed by variable sized file handle * and optionally followed by variable sized filename. */ switch (info_type) { case FAN_EVENT_INFO_TYPE_FID: case FAN_EVENT_INFO_TYPE_DFID: if (WARN_ON_ONCE(name_len)) return -EFAULT; break; case FAN_EVENT_INFO_TYPE_DFID_NAME: case FAN_EVENT_INFO_TYPE_OLD_DFID_NAME: case FAN_EVENT_INFO_TYPE_NEW_DFID_NAME: if (WARN_ON_ONCE(!name || !name_len)) return -EFAULT; break; default: return -EFAULT; } info.hdr.info_type = info_type; info.hdr.len = len; info.fsid = *fsid; if (copy_to_user(buf, &info, sizeof(info))) return -EFAULT; buf += sizeof(info); len -= sizeof(info); if (WARN_ON_ONCE(len < sizeof(handle))) return -EFAULT; handle.handle_type = fh->type; handle.handle_bytes = fh_len; /* Mangle handle_type for bad file_handle */ if (!fh_len) handle.handle_type = FILEID_INVALID; if (copy_to_user(buf, &handle, sizeof(handle))) return -EFAULT; buf += sizeof(handle); len -= sizeof(handle); if (WARN_ON_ONCE(len < fh_len)) return -EFAULT; /* * For an inline fh and inline file name, copy through stack to exclude * the copy from usercopy hardening protections. */ fh_buf = fanotify_fh_buf(fh); if (fh_len <= FANOTIFY_INLINE_FH_LEN) { memcpy(bounce, fh_buf, fh_len); fh_buf = bounce; } if (copy_to_user(buf, fh_buf, fh_len)) return -EFAULT; buf += fh_len; len -= fh_len; if (name_len) { /* Copy the filename with terminating null */ name_len++; if (WARN_ON_ONCE(len < name_len)) return -EFAULT; if (copy_to_user(buf, name, name_len)) return -EFAULT; buf += name_len; len -= name_len; } /* Pad with 0's */ WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN); if (len > 0 && clear_user(buf, len)) return -EFAULT; return info_len; } static int copy_pidfd_info_to_user(int pidfd, char __user *buf, size_t count) { struct fanotify_event_info_pidfd info = { }; size_t info_len = FANOTIFY_PIDFD_INFO_LEN; if (WARN_ON_ONCE(info_len > count)) return -EFAULT; info.hdr.info_type = FAN_EVENT_INFO_TYPE_PIDFD; info.hdr.len = info_len; info.pidfd = pidfd; if (copy_to_user(buf, &info, info_len)) return -EFAULT; return info_len; } static size_t copy_range_info_to_user(struct fanotify_event *event, char __user *buf, int count) { struct fanotify_perm_event *pevent = FANOTIFY_PERM(event); struct fanotify_event_info_range info = { }; size_t info_len = FANOTIFY_RANGE_INFO_LEN; if (WARN_ON_ONCE(info_len > count)) return -EFAULT; if (WARN_ON_ONCE(!pevent->ppos)) return -EINVAL; info.hdr.info_type = FAN_EVENT_INFO_TYPE_RANGE; info.hdr.len = info_len; info.offset = *(pevent->ppos); info.count = pevent->count; if (copy_to_user(buf, &info, info_len)) return -EFAULT; return info_len; } static int copy_info_records_to_user(struct fanotify_event *event, struct fanotify_info *info, unsigned int info_mode, int pidfd, char __user *buf, size_t count) { int ret, total_bytes = 0, info_type = 0; unsigned int fid_mode = info_mode & FANOTIFY_FID_BITS; unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD; /* * Event info records order is as follows: * 1. dir fid + name * 2. (optional) new dir fid + new name * 3. (optional) child fid */ if (fanotify_event_has_dir_fh(event)) { info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME : FAN_EVENT_INFO_TYPE_DFID; /* FAN_RENAME uses special info types */ if (event->mask & FAN_RENAME) info_type = FAN_EVENT_INFO_TYPE_OLD_DFID_NAME; ret = copy_fid_info_to_user(fanotify_event_fsid(event), fanotify_info_dir_fh(info), info_type, fanotify_info_name(info), info->name_len, buf, count); if (ret < 0) return ret; buf += ret; count -= ret; total_bytes += ret; } /* New dir fid+name may be reported in addition to old dir fid+name */ if (fanotify_event_has_dir2_fh(event)) { info_type = FAN_EVENT_INFO_TYPE_NEW_DFID_NAME; ret = copy_fid_info_to_user(fanotify_event_fsid(event), fanotify_info_dir2_fh(info), info_type, fanotify_info_name2(info), info->name2_len, buf, count); if (ret < 0) return ret; buf += ret; count -= ret; total_bytes += ret; } if (fanotify_event_has_object_fh(event)) { const char *dot = NULL; int dot_len = 0; if (fid_mode == FAN_REPORT_FID || info_type) { /* * With only group flag FAN_REPORT_FID only type FID is * reported. Second info record type is always FID. */ info_type = FAN_EVENT_INFO_TYPE_FID; } else if ((fid_mode & FAN_REPORT_NAME) && (event->mask & FAN_ONDIR)) { /* * With group flag FAN_REPORT_NAME, if name was not * recorded in an event on a directory, report the name * "." with info type DFID_NAME. */ info_type = FAN_EVENT_INFO_TYPE_DFID_NAME; dot = "."; dot_len = 1; } else if ((event->mask & ALL_FSNOTIFY_DIRENT_EVENTS) || (event->mask & FAN_ONDIR)) { /* * With group flag FAN_REPORT_DIR_FID, a single info * record has type DFID for directory entry modification * event and for event on a directory. */ info_type = FAN_EVENT_INFO_TYPE_DFID; } else { /* * With group flags FAN_REPORT_DIR_FID|FAN_REPORT_FID, * a single info record has type FID for event on a * non-directory, when there is no directory to report. * For example, on FAN_DELETE_SELF event. */ info_type = FAN_EVENT_INFO_TYPE_FID; } ret = copy_fid_info_to_user(fanotify_event_fsid(event), fanotify_event_object_fh(event), info_type, dot, dot_len, buf, count); if (ret < 0) return ret; buf += ret; count -= ret; total_bytes += ret; } if (pidfd_mode) { ret = copy_pidfd_info_to_user(pidfd, buf, count); if (ret < 0) return ret; buf += ret; count -= ret; total_bytes += ret; } if (fanotify_is_error_event(event->mask)) { ret = copy_error_info_to_user(event, buf, count); if (ret < 0) return ret; buf += ret; count -= ret; total_bytes += ret; } if (fanotify_event_has_access_range(event)) { ret = copy_range_info_to_user(event, buf, count); if (ret < 0) return ret; buf += ret; count -= ret; total_bytes += ret; } if (fanotify_is_mnt_event(event->mask)) { ret = copy_mnt_info_to_user(event, buf, count); if (ret < 0) return ret; buf += ret; count -= ret; total_bytes += ret; } return total_bytes; } static ssize_t copy_event_to_user(struct fsnotify_group *group, struct fanotify_event *event, char __user *buf, size_t count) { struct fanotify_event_metadata metadata; const struct path *path = fanotify_event_path(event); struct fanotify_info *info = fanotify_event_info(event); unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES); unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD; struct file *f = NULL, *pidfd_file = NULL; int ret, pidfd = -ESRCH, fd = -EBADF; pr_debug("%s: group=%p event=%p\n", __func__, group, event); metadata.event_len = fanotify_event_len(info_mode, event); metadata.metadata_len = FAN_EVENT_METADATA_LEN; metadata.vers = FANOTIFY_METADATA_VERSION; metadata.reserved = 0; metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; metadata.pid = pid_vnr(event->pid); /* * For an unprivileged listener, event->pid can be used to identify the * events generated by the listener process itself, without disclosing * the pids of other processes. */ if (FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && task_tgid(current) != event->pid) metadata.pid = 0; /* * For now, fid mode is required for an unprivileged listener and * fid mode does not report fd in events. Keep this check anyway * for safety in case fid mode requirement is relaxed in the future * to allow unprivileged listener to get events with no fd and no fid. */ if (!FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && path && path->mnt && path->dentry) { fd = create_fd(group, path, &f); /* * Opening an fd from dentry can fail for several reasons. * For example, when tasks are gone and we try to open their * /proc files or we try to open a WRONLY file like in sysfs * or when trying to open a file that was deleted on the * remote network server. * * For a group with FAN_REPORT_FD_ERROR, we will send the * event with the error instead of the open fd, otherwise * Userspace may not get the error at all. * In any case, userspace will not know which file failed to * open, so add a debug print for further investigation. */ if (fd < 0) { pr_debug("fanotify: create_fd(%pd2) failed err=%d\n", path->dentry, fd); if (!FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR)) { /* * Historically, we've handled EOPENSTALE in a * special way and silently dropped such * events. Now we have to keep it to maintain * backward compatibility... */ if (fd == -EOPENSTALE) fd = 0; return fd; } } } if (FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR)) metadata.fd = fd; else metadata.fd = fd >= 0 ? fd : FAN_NOFD; if (pidfd_mode) { /* * Complain if the FAN_REPORT_PIDFD and FAN_REPORT_TID mutual * exclusion is ever lifted. At the time of incoporating pidfd * support within fanotify, the pidfd API only supported the * creation of pidfds for thread-group leaders. */ WARN_ON_ONCE(FAN_GROUP_FLAG(group, FAN_REPORT_TID)); /* * The PIDTYPE_TGID check for an event->pid is performed * preemptively in an attempt to catch out cases where the event * listener reads events after the event generating process has * already terminated. Depending on flag FAN_REPORT_FD_ERROR, * report either -ESRCH or FAN_NOPIDFD to the event listener in * those cases with all other pidfd creation errors reported as * the error code itself or as FAN_EPIDFD. */ if (metadata.pid && pid_has_task(event->pid, PIDTYPE_TGID)) pidfd = pidfd_prepare(event->pid, 0, &pidfd_file); if (!FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR) && pidfd < 0) pidfd = pidfd == -ESRCH ? FAN_NOPIDFD : FAN_EPIDFD; } ret = -EFAULT; /* * Sanity check copy size in case get_one_event() and * event_len sizes ever get out of sync. */ if (WARN_ON_ONCE(metadata.event_len > count)) goto out_close_fd; if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN)) goto out_close_fd; buf += FAN_EVENT_METADATA_LEN; count -= FAN_EVENT_METADATA_LEN; ret = copy_info_records_to_user(event, info, info_mode, pidfd, buf, count); if (ret < 0) goto out_close_fd; if (f) fd_install(fd, f); if (pidfd_file) fd_install(pidfd, pidfd_file); if (fanotify_is_perm_event(event->mask)) FANOTIFY_PERM(event)->fd = fd; return metadata.event_len; out_close_fd: if (f) { put_unused_fd(fd); fput(f); } if (pidfd_file) { put_unused_fd(pidfd); fput(pidfd_file); } return ret; } /* intofiy userspace file descriptor functions */ static __poll_t fanotify_poll(struct file *file, poll_table *wait) { struct fsnotify_group *group = file->private_data; __poll_t ret = 0; poll_wait(file, &group->notification_waitq, wait); spin_lock(&group->notification_lock); if (!fsnotify_notify_queue_is_empty(group)) ret = EPOLLIN | EPOLLRDNORM; spin_unlock(&group->notification_lock); return ret; } static ssize_t fanotify_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { struct fsnotify_group *group; struct fanotify_event *event; char __user *start; int ret; DEFINE_WAIT_FUNC(wait, woken_wake_function); start = buf; group = file->private_data; pr_debug("%s: group=%p\n", __func__, group); add_wait_queue(&group->notification_waitq, &wait); while (1) { /* * User can supply arbitrarily large buffer. Avoid softlockups * in case there are lots of available events. */ cond_resched(); event = get_one_event(group, count); if (IS_ERR(event)) { ret = PTR_ERR(event); break; } if (!event) { ret = -EAGAIN; if (file->f_flags & O_NONBLOCK) break; ret = -ERESTARTSYS; if (signal_pending(current)) break; if (start != buf) break; wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); continue; } ret = copy_event_to_user(group, event, buf, count); /* * Permission events get queued to wait for response. Other * events can be destroyed now. */ if (!fanotify_is_perm_event(event->mask)) { fsnotify_destroy_event(group, &event->fse); } else { if (ret <= 0 || FANOTIFY_PERM(event)->fd < 0) { spin_lock(&group->notification_lock); finish_permission_event(group, FANOTIFY_PERM(event), FAN_DENY, NULL); wake_up(&group->fanotify_data.access_waitq); } else { spin_lock(&group->notification_lock); list_add_tail(&event->fse.list, &group->fanotify_data.access_list); spin_unlock(&group->notification_lock); } } if (ret < 0) break; buf += ret; count -= ret; } remove_wait_queue(&group->notification_waitq, &wait); if (start != buf && ret != -EFAULT) ret = buf - start; return ret; } static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { struct fanotify_response response; struct fsnotify_group *group; int ret; const char __user *info_buf = buf + sizeof(struct fanotify_response); size_t info_len; if (!IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) return -EINVAL; group = file->private_data; pr_debug("%s: group=%p count=%zu\n", __func__, group, count); if (count < sizeof(response)) return -EINVAL; if (copy_from_user(&response, buf, sizeof(response))) return -EFAULT; info_len = count - sizeof(response); ret = process_access_response(group, &response, info_buf, info_len); if (ret < 0) count = ret; else count = sizeof(response) + ret; return count; } static int fanotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; struct fsnotify_event *fsn_event; /* * Stop new events from arriving in the notification queue. since * userspace cannot use fanotify fd anymore, no event can enter or * leave access_list by now either. */ fsnotify_group_stop_queueing(group); /* * Process all permission events on access_list and notification queue * and simulate reply from userspace. */ spin_lock(&group->notification_lock); while (!list_empty(&group->fanotify_data.access_list)) { struct fanotify_perm_event *event; event = list_first_entry(&group->fanotify_data.access_list, struct fanotify_perm_event, fae.fse.list); list_del_init(&event->fae.fse.list); finish_permission_event(group, event, FAN_ALLOW, NULL); spin_lock(&group->notification_lock); } /* * Destroy all non-permission events. For permission events just * dequeue them and set the response. They will be freed once the * response is consumed and fanotify_get_response() returns. */ while ((fsn_event = fsnotify_remove_first_event(group))) { struct fanotify_event *event = FANOTIFY_E(fsn_event); if (!(event->mask & FANOTIFY_PERM_EVENTS)) { spin_unlock(&group->notification_lock); fsnotify_destroy_event(group, fsn_event); } else { finish_permission_event(group, FANOTIFY_PERM(event), FAN_ALLOW, NULL); } spin_lock(&group->notification_lock); } spin_unlock(&group->notification_lock); /* Response for all permission events it set, wakeup waiters */ wake_up(&group->fanotify_data.access_waitq); /* matches the fanotify_init->fsnotify_alloc_group */ fsnotify_destroy_group(group); return 0; } static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct fsnotify_group *group; struct fsnotify_event *fsn_event; void __user *p; int ret = -ENOTTY; size_t send_len = 0; group = file->private_data; p = (void __user *) arg; switch (cmd) { case FIONREAD: spin_lock(&group->notification_lock); list_for_each_entry(fsn_event, &group->notification_list, list) send_len += FAN_EVENT_METADATA_LEN; spin_unlock(&group->notification_lock); ret = put_user(send_len, (int __user *) p); break; } return ret; } static const struct file_operations fanotify_fops = { .show_fdinfo = fanotify_show_fdinfo, .poll = fanotify_poll, .read = fanotify_read, .write = fanotify_write, .fasync = NULL, .release = fanotify_release, .unlocked_ioctl = fanotify_ioctl, .compat_ioctl = compat_ptr_ioctl, .llseek = noop_llseek, }; static int fanotify_find_path(int dfd, const char __user *filename, struct path *path, unsigned int flags, __u64 mask, unsigned int obj_type) { int ret; pr_debug("%s: dfd=%d filename=%p flags=%x\n", __func__, dfd, filename, flags); if (filename == NULL) { CLASS(fd, f)(dfd); if (fd_empty(f)) return -EBADF; if ((flags & FAN_MARK_ONLYDIR) && !(S_ISDIR(file_inode(fd_file(f))->i_mode))) return -ENOTDIR; *path = fd_file(f)->f_path; path_get(path); } else { unsigned int lookup_flags = 0; if (!(flags & FAN_MARK_DONT_FOLLOW)) lookup_flags |= LOOKUP_FOLLOW; if (flags & FAN_MARK_ONLYDIR) lookup_flags |= LOOKUP_DIRECTORY; ret = user_path_at(dfd, filename, lookup_flags, path); if (ret) goto out; } /* you can only watch an inode if you have read permissions on it */ ret = path_permission(path, MAY_READ); if (ret) { path_put(path); goto out; } ret = security_path_notify(path, mask, obj_type); if (ret) path_put(path); out: return ret; } static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, __u32 mask, unsigned int flags, __u32 umask, int *destroy) { __u32 oldmask, newmask; /* umask bits cannot be removed by user */ mask &= ~umask; spin_lock(&fsn_mark->lock); oldmask = fsnotify_calc_mask(fsn_mark); if (!(flags & FANOTIFY_MARK_IGNORE_BITS)) { fsn_mark->mask &= ~mask; } else { fsn_mark->ignore_mask &= ~mask; } newmask = fsnotify_calc_mask(fsn_mark); /* * We need to keep the mark around even if remaining mask cannot * result in any events (e.g. mask == FAN_ONDIR) to support incremenal * changes to the mask. * Destroy mark when only umask bits remain. */ *destroy = !((fsn_mark->mask | fsn_mark->ignore_mask) & ~umask); spin_unlock(&fsn_mark->lock); return oldmask & ~newmask; } static int fanotify_remove_mark(struct fsnotify_group *group, void *obj, unsigned int obj_type, __u32 mask, unsigned int flags, __u32 umask) { struct fsnotify_mark *fsn_mark = NULL; __u32 removed; int destroy_mark; fsnotify_group_lock(group); fsn_mark = fsnotify_find_mark(obj, obj_type, group); if (!fsn_mark) { fsnotify_group_unlock(group); return -ENOENT; } removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, umask, &destroy_mark); if (removed & fsnotify_conn_mask(fsn_mark->connector)) fsnotify_recalc_mask(fsn_mark->connector); if (destroy_mark) fsnotify_detach_mark(fsn_mark); fsnotify_group_unlock(group); if (destroy_mark) fsnotify_free_mark(fsn_mark); /* matches the fsnotify_find_mark() */ fsnotify_put_mark(fsn_mark); return 0; } static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark, unsigned int fan_flags) { bool want_iref = !(fan_flags & FAN_MARK_EVICTABLE); unsigned int ignore = fan_flags & FANOTIFY_MARK_IGNORE_BITS; bool recalc = false; /* * When using FAN_MARK_IGNORE for the first time, mark starts using * independent event flags in ignore mask. After that, trying to * update the ignore mask with the old FAN_MARK_IGNORED_MASK API * will result in EEXIST error. */ if (ignore == FAN_MARK_IGNORE) fsn_mark->flags |= FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS; /* * Setting FAN_MARK_IGNORED_SURV_MODIFY for the first time may lead to * the removal of the FS_MODIFY bit in calculated mask if it was set * because of an ignore mask that is now going to survive FS_MODIFY. */ if (ignore && (fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) && !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) { fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; if (!(fsn_mark->mask & FS_MODIFY)) recalc = true; } if (fsn_mark->connector->type != FSNOTIFY_OBJ_TYPE_INODE || want_iref == !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) return recalc; /* * NO_IREF may be removed from a mark, but not added. * When removed, fsnotify_recalc_mask() will take the inode ref. */ WARN_ON_ONCE(!want_iref); fsn_mark->flags &= ~FSNOTIFY_MARK_FLAG_NO_IREF; return true; } static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, __u32 mask, unsigned int fan_flags) { bool recalc; spin_lock(&fsn_mark->lock); if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS)) fsn_mark->mask |= mask; else fsn_mark->ignore_mask |= mask; recalc = fsnotify_calc_mask(fsn_mark) & ~fsnotify_conn_mask(fsn_mark->connector); recalc |= fanotify_mark_update_flags(fsn_mark, fan_flags); spin_unlock(&fsn_mark->lock); return recalc; } struct fan_fsid { struct super_block *sb; __kernel_fsid_t id; bool weak; }; static int fanotify_set_mark_fsid(struct fsnotify_group *group, struct fsnotify_mark *mark, struct fan_fsid *fsid) { struct fsnotify_mark_connector *conn; struct fsnotify_mark *old; struct super_block *old_sb = NULL; FANOTIFY_MARK(mark)->fsid = fsid->id; mark->flags |= FSNOTIFY_MARK_FLAG_HAS_FSID; if (fsid->weak) mark->flags |= FSNOTIFY_MARK_FLAG_WEAK_FSID; /* First mark added will determine if group is single or multi fsid */ if (list_empty(&group->marks_list)) return 0; /* Find sb of an existing mark */ list_for_each_entry(old, &group->marks_list, g_list) { conn = READ_ONCE(old->connector); if (!conn) continue; old_sb = fsnotify_connector_sb(conn); if (old_sb) break; } /* Only detached marks left? */ if (!old_sb) return 0; /* Do not allow mixing of marks with weak and strong fsid */ if ((mark->flags ^ old->flags) & FSNOTIFY_MARK_FLAG_WEAK_FSID) return -EXDEV; /* Allow mixing of marks with strong fsid from different fs */ if (!fsid->weak) return 0; /* Do not allow mixing marks with weak fsid from different fs */ if (old_sb != fsid->sb) return -EXDEV; /* Do not allow mixing marks from different btrfs sub-volumes */ if (!fanotify_fsid_equal(&FANOTIFY_MARK(old)->fsid, &FANOTIFY_MARK(mark)->fsid)) return -EXDEV; return 0; } static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, void *obj, unsigned int obj_type, unsigned int fan_flags, struct fan_fsid *fsid) { struct ucounts *ucounts = group->fanotify_data.ucounts; struct fanotify_mark *fan_mark; struct fsnotify_mark *mark; int ret; /* * Enforce per user marks limits per user in all containing user ns. * A group with FAN_UNLIMITED_MARKS does not contribute to mark count * in the limited groups account. */ BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_MARKS)); if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS) && !inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_FANOTIFY_MARKS)) return ERR_PTR(-ENOSPC); fan_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); if (!fan_mark) { ret = -ENOMEM; goto out_dec_ucounts; } mark = &fan_mark->fsn_mark; fsnotify_init_mark(mark, group); if (fan_flags & FAN_MARK_EVICTABLE) mark->flags |= FSNOTIFY_MARK_FLAG_NO_IREF; /* Cache fsid of filesystem containing the marked object */ if (fsid) { ret = fanotify_set_mark_fsid(group, mark, fsid); if (ret) goto out_put_mark; } else { fan_mark->fsid.val[0] = fan_mark->fsid.val[1] = 0; } ret = fsnotify_add_mark_locked(mark, obj, obj_type, 0); if (ret) goto out_put_mark; return mark; out_put_mark: fsnotify_put_mark(mark); out_dec_ucounts: if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS)) dec_ucount(ucounts, UCOUNT_FANOTIFY_MARKS); return ERR_PTR(ret); } static int fanotify_group_init_error_pool(struct fsnotify_group *group) { if (mempool_initialized(&group->fanotify_data.error_events_pool)) return 0; return mempool_init_kmalloc_pool(&group->fanotify_data.error_events_pool, FANOTIFY_DEFAULT_FEE_POOL_SIZE, sizeof(struct fanotify_error_event)); } static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark, __u32 mask, unsigned int fan_flags) { /* * Non evictable mark cannot be downgraded to evictable mark. */ if (fan_flags & FAN_MARK_EVICTABLE && !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) return -EEXIST; /* * New ignore mask semantics cannot be downgraded to old semantics. */ if (fan_flags & FAN_MARK_IGNORED_MASK && fsn_mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS) return -EEXIST; /* * An ignore mask that survives modify could never be downgraded to not * survive modify. With new FAN_MARK_IGNORE semantics we make that rule * explicit and return an error when trying to update the ignore mask * without the original FAN_MARK_IGNORED_SURV_MODIFY value. */ if (fan_flags & FAN_MARK_IGNORE && !(fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) && fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) return -EEXIST; /* For now pre-content events are not generated for directories */ mask |= fsn_mark->mask; if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR) return -EEXIST; return 0; } static int fanotify_add_mark(struct fsnotify_group *group, void *obj, unsigned int obj_type, __u32 mask, unsigned int fan_flags, struct fan_fsid *fsid) { struct fsnotify_mark *fsn_mark; bool recalc; int ret = 0; fsnotify_group_lock(group); fsn_mark = fsnotify_find_mark(obj, obj_type, group); if (!fsn_mark) { fsn_mark = fanotify_add_new_mark(group, obj, obj_type, fan_flags, fsid); if (IS_ERR(fsn_mark)) { fsnotify_group_unlock(group); return PTR_ERR(fsn_mark); } } /* * Check if requested mark flags conflict with an existing mark flags. */ ret = fanotify_may_update_existing_mark(fsn_mark, mask, fan_flags); if (ret) goto out; /* * Error events are pre-allocated per group, only if strictly * needed (i.e. FAN_FS_ERROR was requested). */ if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS) && (mask & FAN_FS_ERROR)) { ret = fanotify_group_init_error_pool(group); if (ret) goto out; } recalc = fanotify_mark_add_to_mask(fsn_mark, mask, fan_flags); if (recalc) fsnotify_recalc_mask(fsn_mark->connector); out: fsnotify_group_unlock(group); fsnotify_put_mark(fsn_mark); return ret; } static struct fsnotify_event *fanotify_alloc_overflow_event(void) { struct fanotify_event *oevent; oevent = kmalloc(sizeof(*oevent), GFP_KERNEL_ACCOUNT); if (!oevent) return NULL; fanotify_init_event(oevent, 0, FS_Q_OVERFLOW); oevent->type = FANOTIFY_EVENT_TYPE_OVERFLOW; return &oevent->fse; } static struct hlist_head *fanotify_alloc_merge_hash(void) { struct hlist_head *hash; hash = kmalloc(sizeof(struct hlist_head) << FANOTIFY_HTABLE_BITS, GFP_KERNEL_ACCOUNT); if (!hash) return NULL; __hash_init(hash, FANOTIFY_HTABLE_SIZE); return hash; } /* fanotify syscalls */ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) { struct user_namespace *user_ns = current_user_ns(); struct fsnotify_group *group; int f_flags, fd; unsigned int fid_mode = flags & FANOTIFY_FID_BITS; unsigned int class = flags & FANOTIFY_CLASS_BITS; unsigned int internal_flags = 0; struct file *file; pr_debug("%s: flags=%x event_f_flags=%x\n", __func__, flags, event_f_flags); if (!capable(CAP_SYS_ADMIN)) { /* * An unprivileged user can setup an fanotify group with * limited functionality - an unprivileged group is limited to * notification events with file handles or mount ids and it * cannot use unlimited queue/marks. */ if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || !(flags & (FANOTIFY_FID_BITS | FAN_REPORT_MNT))) return -EPERM; /* * Setting the internal flag FANOTIFY_UNPRIV on the group * prevents setting mount/filesystem marks on this group and * prevents reporting pid and open fd in events. */ internal_flags |= FANOTIFY_UNPRIV; } #ifdef CONFIG_AUDITSYSCALL if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT)) #else if (flags & ~FANOTIFY_INIT_FLAGS) #endif return -EINVAL; /* * A pidfd can only be returned for a thread-group leader; thus * FAN_REPORT_PIDFD and FAN_REPORT_TID need to remain mutually * exclusive. */ if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID)) return -EINVAL; /* Don't allow mixing mnt events with inode events for now */ if (flags & FAN_REPORT_MNT) { if (class != FAN_CLASS_NOTIF) return -EINVAL; if (flags & (FANOTIFY_FID_BITS | FAN_REPORT_FD_ERROR)) return -EINVAL; } if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS) return -EINVAL; switch (event_f_flags & O_ACCMODE) { case O_RDONLY: case O_RDWR: case O_WRONLY: break; default: return -EINVAL; } if (fid_mode && class != FAN_CLASS_NOTIF) return -EINVAL; /* * Child name is reported with parent fid so requires dir fid. * We can report both child fid and dir fid with or without name. */ if ((fid_mode & FAN_REPORT_NAME) && !(fid_mode & FAN_REPORT_DIR_FID)) return -EINVAL; /* * FAN_REPORT_TARGET_FID requires FAN_REPORT_NAME and FAN_REPORT_FID * and is used as an indication to report both dir and child fid on all * dirent events. */ if ((fid_mode & FAN_REPORT_TARGET_FID) && (!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID))) return -EINVAL; f_flags = O_RDWR; if (flags & FAN_CLOEXEC) f_flags |= O_CLOEXEC; if (flags & FAN_NONBLOCK) f_flags |= O_NONBLOCK; /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ group = fsnotify_alloc_group(&fanotify_fsnotify_ops, FSNOTIFY_GROUP_USER); if (IS_ERR(group)) { return PTR_ERR(group); } /* Enforce groups limits per user in all containing user ns */ group->fanotify_data.ucounts = inc_ucount(user_ns, current_euid(), UCOUNT_FANOTIFY_GROUPS); if (!group->fanotify_data.ucounts) { fd = -EMFILE; goto out_destroy_group; } group->fanotify_data.flags = flags | internal_flags; group->memcg = get_mem_cgroup_from_mm(current->mm); group->user_ns = get_user_ns(user_ns); group->fanotify_data.merge_hash = fanotify_alloc_merge_hash(); if (!group->fanotify_data.merge_hash) { fd = -ENOMEM; goto out_destroy_group; } group->overflow_event = fanotify_alloc_overflow_event(); if (unlikely(!group->overflow_event)) { fd = -ENOMEM; goto out_destroy_group; } if (force_o_largefile()) event_f_flags |= O_LARGEFILE; group->fanotify_data.f_flags = event_f_flags; init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); switch (class) { case FAN_CLASS_NOTIF: group->priority = FSNOTIFY_PRIO_NORMAL; break; case FAN_CLASS_CONTENT: group->priority = FSNOTIFY_PRIO_CONTENT; break; case FAN_CLASS_PRE_CONTENT: group->priority = FSNOTIFY_PRIO_PRE_CONTENT; break; default: fd = -EINVAL; goto out_destroy_group; } BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_QUEUE)); if (flags & FAN_UNLIMITED_QUEUE) { group->max_events = UINT_MAX; } else { group->max_events = fanotify_max_queued_events; } if (flags & FAN_ENABLE_AUDIT) { fd = -EPERM; if (!capable(CAP_AUDIT_WRITE)) goto out_destroy_group; } fd = get_unused_fd_flags(f_flags); if (fd < 0) goto out_destroy_group; file = anon_inode_getfile_fmode("[fanotify]", &fanotify_fops, group, f_flags, FMODE_NONOTIFY); if (IS_ERR(file)) { put_unused_fd(fd); fd = PTR_ERR(file); goto out_destroy_group; } fd_install(fd, file); return fd; out_destroy_group: fsnotify_destroy_group(group); return fd; } static int fanotify_test_fsid(struct dentry *dentry, unsigned int flags, struct fan_fsid *fsid) { unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; __kernel_fsid_t root_fsid; int err; /* * Make sure dentry is not of a filesystem with zero fsid (e.g. fuse). */ err = vfs_get_fsid(dentry, &fsid->id); if (err) return err; fsid->sb = dentry->d_sb; if (!fsid->id.val[0] && !fsid->id.val[1]) { err = -ENODEV; goto weak; } /* * Make sure dentry is not of a filesystem subvolume (e.g. btrfs) * which uses a different fsid than sb root. */ err = vfs_get_fsid(dentry->d_sb->s_root, &root_fsid); if (err) return err; if (!fanotify_fsid_equal(&root_fsid, &fsid->id)) { err = -EXDEV; goto weak; } fsid->weak = false; return 0; weak: /* Allow weak fsid when marking inodes */ fsid->weak = true; return (mark_type == FAN_MARK_INODE) ? 0 : err; } /* Check if filesystem can encode a unique fid */ static int fanotify_test_fid(struct dentry *dentry, unsigned int flags) { unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; const struct export_operations *nop = dentry->d_sb->s_export_op; /* * We need to make sure that the filesystem supports encoding of * file handles so user can use name_to_handle_at() to compare fids * reported with events to the file handle of watched objects. */ if (!exportfs_can_encode_fid(nop)) return -EOPNOTSUPP; /* * For sb/mount mark, we also need to make sure that the filesystem * supports decoding file handles, so user has a way to map back the * reported fids to filesystem objects. */ if (mark_type != FAN_MARK_INODE && !exportfs_can_decode_fh(nop)) return -EOPNOTSUPP; return 0; } static int fanotify_events_supported(struct fsnotify_group *group, const struct path *path, __u64 mask, unsigned int flags) { unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; bool is_dir = d_is_dir(path->dentry); /* Strict validation of events in non-dir inode mask with v5.17+ APIs */ bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) || (mask & FAN_RENAME) || (flags & FAN_MARK_IGNORE); /* * Filesystems need to opt-into pre-content evnets (a.k.a HSM) * and they are only supported on regular files and directories. */ if (mask & FANOTIFY_PRE_CONTENT_EVENTS) { if (!(path->mnt->mnt_sb->s_iflags & SB_I_ALLOW_HSM)) return -EOPNOTSUPP; if (!is_dir && !d_is_reg(path->dentry)) return -EINVAL; } /* * Some filesystems such as 'proc' acquire unusual locks when opening * files. For them fanotify permission events have high chances of * deadlocking the system - open done when reporting fanotify event * blocks on this "unusual" lock while another process holding the lock * waits for fanotify permission event to be answered. Just disallow * permission events for such filesystems. */ if (mask & FANOTIFY_PERM_EVENTS && path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM) return -EINVAL; /* * mount and sb marks are not allowed on kernel internal pseudo fs, * like pipe_mnt, because that would subscribe to events on all the * anonynous pipes in the system. * * SB_NOUSER covers all of the internal pseudo fs whose objects are not * exposed to user's mount namespace, but there are other SB_KERNMOUNT * fs, like nsfs, debugfs, for which the value of allowing sb and mount * mark is questionable. For now we leave them alone. */ if (mark_type != FAN_MARK_INODE && path->mnt->mnt_sb->s_flags & SB_NOUSER) return -EINVAL; /* * We shouldn't have allowed setting dirent events and the directory * flags FAN_ONDIR and FAN_EVENT_ON_CHILD in mask of non-dir inode, * but because we always allowed it, error only when using new APIs. */ if (strict_dir_events && mark_type == FAN_MARK_INODE && !is_dir && (mask & FANOTIFY_DIRONLY_EVENT_BITS)) return -ENOTDIR; return 0; } static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, int dfd, const char __user *pathname) { struct inode *inode = NULL; struct fsnotify_group *group; struct path path; struct fan_fsid __fsid, *fsid = NULL; struct user_namespace *user_ns = NULL; struct mnt_namespace *mntns; u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS; unsigned int obj_type, fid_mode; void *obj = NULL; u32 umask = 0; int ret; pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", __func__, fanotify_fd, flags, dfd, pathname, mask); /* we only use the lower 32 bits as of right now. */ if (upper_32_bits(mask)) return -EINVAL; if (flags & ~FANOTIFY_MARK_FLAGS) return -EINVAL; switch (mark_type) { case FAN_MARK_INODE: obj_type = FSNOTIFY_OBJ_TYPE_INODE; break; case FAN_MARK_MOUNT: obj_type = FSNOTIFY_OBJ_TYPE_VFSMOUNT; break; case FAN_MARK_FILESYSTEM: obj_type = FSNOTIFY_OBJ_TYPE_SB; break; case FAN_MARK_MNTNS: obj_type = FSNOTIFY_OBJ_TYPE_MNTNS; break; default: return -EINVAL; } switch (mark_cmd) { case FAN_MARK_ADD: case FAN_MARK_REMOVE: if (!mask) return -EINVAL; break; case FAN_MARK_FLUSH: if (flags & ~(FANOTIFY_MARK_TYPE_BITS | FAN_MARK_FLUSH)) return -EINVAL; break; default: return -EINVAL; } if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) valid_mask |= FANOTIFY_PERM_EVENTS; if (mask & ~valid_mask) return -EINVAL; /* We don't allow FAN_MARK_IGNORE & FAN_MARK_IGNORED_MASK together */ if (ignore == (FAN_MARK_IGNORE | FAN_MARK_IGNORED_MASK)) return -EINVAL; /* * Event flags (FAN_ONDIR, FAN_EVENT_ON_CHILD) have no effect with * FAN_MARK_IGNORED_MASK. */ if (ignore == FAN_MARK_IGNORED_MASK) { mask &= ~FANOTIFY_EVENT_FLAGS; umask = FANOTIFY_EVENT_FLAGS; } CLASS(fd, f)(fanotify_fd); if (fd_empty(f)) return -EBADF; /* verify that this is indeed an fanotify instance */ if (unlikely(fd_file(f)->f_op != &fanotify_fops)) return -EINVAL; group = fd_file(f)->private_data; /* Only report mount events on mnt namespace */ if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) { if (mask & ~FANOTIFY_MOUNT_EVENTS) return -EINVAL; if (mark_type != FAN_MARK_MNTNS) return -EINVAL; } else { if (mask & FANOTIFY_MOUNT_EVENTS) return -EINVAL; if (mark_type == FAN_MARK_MNTNS) return -EINVAL; } /* * A user is allowed to setup sb/mount/mntns marks only if it is * capable in the user ns where the group was created. */ if (!ns_capable(group->user_ns, CAP_SYS_ADMIN) && mark_type != FAN_MARK_INODE) return -EPERM; /* * Permission events are not allowed for FAN_CLASS_NOTIF. * Pre-content permission events are not allowed for FAN_CLASS_CONTENT. */ if (mask & FANOTIFY_PERM_EVENTS && group->priority == FSNOTIFY_PRIO_NORMAL) return -EINVAL; else if (mask & FANOTIFY_PRE_CONTENT_EVENTS && group->priority == FSNOTIFY_PRIO_CONTENT) return -EINVAL; if (mask & FAN_FS_ERROR && mark_type != FAN_MARK_FILESYSTEM) return -EINVAL; /* * Evictable is only relevant for inode marks, because only inode object * can be evicted on memory pressure. */ if (flags & FAN_MARK_EVICTABLE && mark_type != FAN_MARK_INODE) return -EINVAL; /* * Events that do not carry enough information to report * event->fd require a group that supports reporting fid. Those * events are not supported on a mount mark, because they do not * carry enough information (i.e. path) to be filtered by mount * point. */ fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_MOUNT_EVENTS|FANOTIFY_EVENT_FLAGS) && (!fid_mode || mark_type == FAN_MARK_MOUNT)) return -EINVAL; /* * FAN_RENAME uses special info type records to report the old and * new parent+name. Reporting only old and new parent id is less * useful and was not implemented. */ if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME)) return -EINVAL; /* Pre-content events are not currently generated for directories. */ if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR) return -EINVAL; if (mark_cmd == FAN_MARK_FLUSH) { fsnotify_clear_marks_by_group(group, obj_type); return 0; } ret = fanotify_find_path(dfd, pathname, &path, flags, (mask & ALL_FSNOTIFY_EVENTS), obj_type); if (ret) return ret; if (mark_cmd == FAN_MARK_ADD) { ret = fanotify_events_supported(group, &path, mask, flags); if (ret) goto path_put_and_out; } if (fid_mode) { ret = fanotify_test_fsid(path.dentry, flags, &__fsid); if (ret) goto path_put_and_out; ret = fanotify_test_fid(path.dentry, flags); if (ret) goto path_put_and_out; fsid = &__fsid; } /* * In addition to being capable in the user ns where group was created, * the user also needs to be capable in the user ns associated with * the filesystem or in the user ns associated with the mntns * (when marking mntns). */ if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) { inode = path.dentry->d_inode; obj = inode; } else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { user_ns = path.mnt->mnt_sb->s_user_ns; obj = path.mnt; } else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) { user_ns = path.mnt->mnt_sb->s_user_ns; obj = path.mnt->mnt_sb; } else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) { mntns = mnt_ns_from_dentry(path.dentry); user_ns = mntns->user_ns; obj = mntns; } ret = -EPERM; if (user_ns && !ns_capable(user_ns, CAP_SYS_ADMIN)) goto path_put_and_out; ret = -EINVAL; if (!obj) goto path_put_and_out; /* * If some other task has this inode open for write we should not add * an ignore mask, unless that ignore mask is supposed to survive * modification changes anyway. */ if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) && !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) { ret = !inode ? -EINVAL : -EISDIR; /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */ if (ignore == FAN_MARK_IGNORE && (!inode || S_ISDIR(inode->i_mode))) goto path_put_and_out; ret = 0; if (inode && inode_is_open_for_write(inode)) goto path_put_and_out; } /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ if (!inode || !S_ISDIR(inode->i_mode)) { mask &= ~FAN_EVENT_ON_CHILD; umask = FAN_EVENT_ON_CHILD; /* * If group needs to report parent fid, register for getting * events with parent/name info for non-directory. */ if ((fid_mode & FAN_REPORT_DIR_FID) && (flags & FAN_MARK_ADD) && !ignore) mask |= FAN_EVENT_ON_CHILD; } /* create/update an inode mark */ switch (mark_cmd) { case FAN_MARK_ADD: ret = fanotify_add_mark(group, obj, obj_type, mask, flags, fsid); break; case FAN_MARK_REMOVE: ret = fanotify_remove_mark(group, obj, obj_type, mask, flags, umask); break; default: ret = -EINVAL; } path_put_and_out: path_put(&path); return ret; } #ifndef CONFIG_ARCH_SPLIT_ARG64 SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, __u64, mask, int, dfd, const char __user *, pathname) { return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname); } #endif #if defined(CONFIG_ARCH_SPLIT_ARG64) || defined(CONFIG_COMPAT) SYSCALL32_DEFINE6(fanotify_mark, int, fanotify_fd, unsigned int, flags, SC_ARG64(mask), int, dfd, const char __user *, pathname) { return do_fanotify_mark(fanotify_fd, flags, SC_VAL64(__u64, mask), dfd, pathname); } #endif /* * fanotify_user_setup - Our initialization function. Note that we cannot return * error because we have compiled-in VFS hooks. So an (unlikely) failure here * must result in panic(). */ static int __init fanotify_user_setup(void) { struct sysinfo si; int max_marks; si_meminfo(&si); /* * Allow up to 1% of addressable memory to be accounted for per user * marks limited to the range [8192, 1048576]. mount and sb marks are * a lot cheaper than inode marks, but there is no reason for a user * to have many of those, so calculate by the cost of inode marks. */ max_marks = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) / INODE_MARK_COST; max_marks = clamp(max_marks, FANOTIFY_OLD_DEFAULT_MAX_MARKS, FANOTIFY_DEFAULT_MAX_USER_MARKS); BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS); BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 14); BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11); fanotify_mark_cache = KMEM_CACHE(fanotify_mark, SLAB_PANIC|SLAB_ACCOUNT); fanotify_fid_event_cachep = KMEM_CACHE(fanotify_fid_event, SLAB_PANIC); fanotify_path_event_cachep = KMEM_CACHE(fanotify_path_event, SLAB_PANIC); if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) { fanotify_perm_event_cachep = KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); } fanotify_mnt_event_cachep = KMEM_CACHE(fanotify_mnt_event, SLAB_PANIC); fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS; init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] = FANOTIFY_DEFAULT_MAX_GROUPS; init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS] = max_marks; fanotify_sysctls_init(); return 0; } device_initcall(fanotify_user_setup); |
| 6 6 5 4 3 6 2 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <linux/unaligned.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> struct nft_byteorder { u8 sreg; u8 dreg; enum nft_byteorder_ops op:8; u8 len; u8 size; }; void nft_byteorder_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_byteorder *priv = nft_expr_priv(expr); u32 *src = ®s->data[priv->sreg]; u32 *dst = ®s->data[priv->dreg]; u16 *s16, *d16; unsigned int i; s16 = (void *)src; d16 = (void *)dst; switch (priv->size) { case 8: { u64 *dst64 = (void *)dst; u64 src64; switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 8; i++) { src64 = nft_reg_load64(&src[i]); nft_reg_store64(&dst64[i], be64_to_cpu((__force __be64)src64)); } break; case NFT_BYTEORDER_HTON: for (i = 0; i < priv->len / 8; i++) { src64 = (__force __u64) cpu_to_be64(nft_reg_load64(&src[i])); nft_reg_store64(&dst64[i], src64); } break; } break; } case 4: switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 4; i++) dst[i] = ntohl((__force __be32)src[i]); break; case NFT_BYTEORDER_HTON: for (i = 0; i < priv->len / 4; i++) dst[i] = (__force __u32)htonl(src[i]); break; } break; case 2: switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 2; i++) d16[i] = ntohs((__force __be16)s16[i]); break; case NFT_BYTEORDER_HTON: for (i = 0; i < priv->len / 2; i++) d16[i] = (__force __u16)htons(s16[i]); break; } break; } } static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = { [NFTA_BYTEORDER_SREG] = { .type = NLA_U32 }, [NFTA_BYTEORDER_DREG] = { .type = NLA_U32 }, [NFTA_BYTEORDER_OP] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_BYTEORDER_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_BYTEORDER_SIZE] = NLA_POLICY_MAX(NLA_BE32, 255), }; static int nft_byteorder_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_byteorder *priv = nft_expr_priv(expr); u32 size, len; int err; if (tb[NFTA_BYTEORDER_SREG] == NULL || tb[NFTA_BYTEORDER_DREG] == NULL || tb[NFTA_BYTEORDER_LEN] == NULL || tb[NFTA_BYTEORDER_SIZE] == NULL || tb[NFTA_BYTEORDER_OP] == NULL) return -EINVAL; priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP])); switch (priv->op) { case NFT_BYTEORDER_NTOH: case NFT_BYTEORDER_HTON: break; default: return -EINVAL; } err = nft_parse_u32_check(tb[NFTA_BYTEORDER_SIZE], U8_MAX, &size); if (err < 0) return err; priv->size = size; switch (priv->size) { case 2: case 4: case 8: break; default: return -EINVAL; } err = nft_parse_u32_check(tb[NFTA_BYTEORDER_LEN], U8_MAX, &len); if (err < 0) return err; priv->len = len; err = nft_parse_register_load(ctx, tb[NFTA_BYTEORDER_SREG], &priv->sreg, priv->len); if (err < 0) return err; return nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, priv->len); } static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_byteorder *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_BYTEORDER_SREG, priv->sreg)) goto nla_put_failure; if (nft_dump_register(skb, NFTA_BYTEORDER_DREG, priv->dreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->len))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_BYTEORDER_SIZE, htonl(priv->size))) goto nla_put_failure; return 0; nla_put_failure: return -1; } static bool nft_byteorder_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { struct nft_byteorder *priv = nft_expr_priv(expr); nft_reg_track_cancel(track, priv->dreg, priv->len); return false; } static const struct nft_expr_ops nft_byteorder_ops = { .type = &nft_byteorder_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), .eval = nft_byteorder_eval, .init = nft_byteorder_init, .dump = nft_byteorder_dump, .reduce = nft_byteorder_reduce, }; struct nft_expr_type nft_byteorder_type __read_mostly = { .name = "byteorder", .ops = &nft_byteorder_ops, .policy = nft_byteorder_policy, .maxattr = NFTA_BYTEORDER_MAX, .owner = THIS_MODULE, }; |
| 3 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* mpi-inline.h - Internal to the Multi Precision Integers * Copyright (C) 1994, 1996, 1998, 1999 Free Software Foundation, Inc. * * This file is part of GnuPG. * * Note: This code is heavily based on the GNU MP Library. * Actually it's the same code with only minor changes in the * way the data is stored; this is to support the abstraction * of an optional secure memory allocation which may be used * to avoid revealing of sensitive data due to paging etc. * The GNU MP Library itself is published under the LGPL; * however I decided to publish this code under the plain GPL. */ #ifndef G10_MPI_INLINE_H #define G10_MPI_INLINE_H #ifndef G10_MPI_INLINE_DECL #define G10_MPI_INLINE_DECL static inline #endif G10_MPI_INLINE_DECL mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb) { mpi_limb_t x; x = *s1_ptr++; s2_limb += x; *res_ptr++ = s2_limb; if (s2_limb < x) { /* sum is less than the left operand: handle carry */ while (--s1_size) { x = *s1_ptr++ + 1; /* add carry */ *res_ptr++ = x; /* and store */ if (x) /* not 0 (no overflow): we can stop */ goto leave; } return 1; /* return carry (size of s1 to small) */ } leave: if (res_ptr != s1_ptr) { /* not the same variable */ mpi_size_t i; /* copy the rest */ for (i = 0; i < s1_size - 1; i++) res_ptr[i] = s1_ptr[i]; } return 0; /* no carry */ } G10_MPI_INLINE_DECL mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_ptr_t s2_ptr, mpi_size_t s2_size) { mpi_limb_t cy = 0; if (s2_size) cy = mpihelp_add_n(res_ptr, s1_ptr, s2_ptr, s2_size); if (s1_size - s2_size) cy = mpihelp_add_1(res_ptr + s2_size, s1_ptr + s2_size, s1_size - s2_size, cy); return cy; } G10_MPI_INLINE_DECL mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb) { mpi_limb_t x; x = *s1_ptr++; s2_limb = x - s2_limb; *res_ptr++ = s2_limb; if (s2_limb > x) { while (--s1_size) { x = *s1_ptr++; *res_ptr++ = x - 1; if (x) goto leave; } return 1; } leave: if (res_ptr != s1_ptr) { mpi_size_t i; for (i = 0; i < s1_size - 1; i++) res_ptr[i] = s1_ptr[i]; } return 0; } G10_MPI_INLINE_DECL mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_ptr_t s2_ptr, mpi_size_t s2_size) { mpi_limb_t cy = 0; if (s2_size) cy = mpihelp_sub_n(res_ptr, s1_ptr, s2_ptr, s2_size); if (s1_size - s2_size) cy = mpihelp_sub_1(res_ptr + s2_size, s1_ptr + s2_size, s1_size - s2_size, cy); return cy; } #endif /*G10_MPI_INLINE_H */ |
| 1 1 24 24 24 24 1 1 1 1 21 2 21 1 1 22 22 22 6 6 1 6 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 | // SPDX-License-Identifier: GPL-2.0-only /* * Landlock - Ptrace and scope hooks * * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net> * Copyright © 2019-2020 ANSSI * Copyright © 2024-2025 Microsoft Corporation */ #include <asm/current.h> #include <linux/cleanup.h> #include <linux/cred.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/lsm_audit.h> #include <linux/lsm_hooks.h> #include <linux/rcupdate.h> #include <linux/sched.h> #include <linux/sched/signal.h> #include <net/af_unix.h> #include <net/sock.h> #include "audit.h" #include "common.h" #include "cred.h" #include "domain.h" #include "fs.h" #include "ruleset.h" #include "setup.h" #include "task.h" /** * domain_scope_le - Checks domain ordering for scoped ptrace * * @parent: Parent domain. * @child: Potential child of @parent. * * Checks if the @parent domain is less or equal to (i.e. an ancestor, which * means a subset of) the @child domain. */ static bool domain_scope_le(const struct landlock_ruleset *const parent, const struct landlock_ruleset *const child) { const struct landlock_hierarchy *walker; /* Quick return for non-landlocked tasks. */ if (!parent) return true; if (!child) return false; for (walker = child->hierarchy; walker; walker = walker->parent) { if (walker == parent->hierarchy) /* @parent is in the scoped hierarchy of @child. */ return true; } /* There is no relationship between @parent and @child. */ return false; } static int domain_ptrace(const struct landlock_ruleset *const parent, const struct landlock_ruleset *const child) { if (domain_scope_le(parent, child)) return 0; return -EPERM; } /** * hook_ptrace_access_check - Determines whether the current process may access * another * * @child: Process to be accessed. * @mode: Mode of attachment. * * If the current task has Landlock rules, then the child must have at least * the same rules. Else denied. * * Determines whether a process may access another, returning 0 if permission * granted, -errno if denied. */ static int hook_ptrace_access_check(struct task_struct *const child, const unsigned int mode) { const struct landlock_cred_security *parent_subject; const struct landlock_ruleset *child_dom; int err; /* Quick return for non-landlocked tasks. */ parent_subject = landlock_cred(current_cred()); if (!parent_subject) return 0; scoped_guard(rcu) { child_dom = landlock_get_task_domain(child); err = domain_ptrace(parent_subject->domain, child_dom); } if (!err) return 0; /* * For the ptrace_access_check case, we log the current/parent domain * and the child task. */ if (!(mode & PTRACE_MODE_NOAUDIT)) landlock_log_denial(parent_subject, &(struct landlock_request) { .type = LANDLOCK_REQUEST_PTRACE, .audit = { .type = LSM_AUDIT_DATA_TASK, .u.tsk = child, }, .layer_plus_one = parent_subject->domain->num_layers, }); return err; } /** * hook_ptrace_traceme - Determines whether another process may trace the * current one * * @parent: Task proposed to be the tracer. * * If the parent has Landlock rules, then the current task must have the same * or more rules. Else denied. * * Determines whether the nominated task is permitted to trace the current * process, returning 0 if permission is granted, -errno if denied. */ static int hook_ptrace_traceme(struct task_struct *const parent) { const struct landlock_cred_security *parent_subject; const struct landlock_ruleset *child_dom; int err; child_dom = landlock_get_current_domain(); guard(rcu)(); parent_subject = landlock_cred(__task_cred(parent)); err = domain_ptrace(parent_subject->domain, child_dom); if (!err) return 0; /* * For the ptrace_traceme case, we log the domain which is the cause of * the denial, which means the parent domain instead of the current * domain. This may look unusual because the ptrace_traceme action is a * request to be traced, but the semantic is consistent with * hook_ptrace_access_check(). */ landlock_log_denial(parent_subject, &(struct landlock_request) { .type = LANDLOCK_REQUEST_PTRACE, .audit = { .type = LSM_AUDIT_DATA_TASK, .u.tsk = current, }, .layer_plus_one = parent_subject->domain->num_layers, }); return err; } /** * domain_is_scoped - Checks if the client domain is scoped in the same * domain as the server. * * @client: IPC sender domain. * @server: IPC receiver domain. * @scope: The scope restriction criteria. * * Returns: True if the @client domain is scoped to access the @server, * unless the @server is also scoped in the same domain as @client. */ static bool domain_is_scoped(const struct landlock_ruleset *const client, const struct landlock_ruleset *const server, access_mask_t scope) { int client_layer, server_layer; const struct landlock_hierarchy *client_walker, *server_walker; /* Quick return if client has no domain */ if (WARN_ON_ONCE(!client)) return false; client_layer = client->num_layers - 1; client_walker = client->hierarchy; /* * client_layer must be a signed integer with greater capacity * than client->num_layers to ensure the following loop stops. */ BUILD_BUG_ON(sizeof(client_layer) > sizeof(client->num_layers)); server_layer = server ? (server->num_layers - 1) : -1; server_walker = server ? server->hierarchy : NULL; /* * Walks client's parent domains down to the same hierarchy level * as the server's domain, and checks that none of these client's * parent domains are scoped. */ for (; client_layer > server_layer; client_layer--) { if (landlock_get_scope_mask(client, client_layer) & scope) return true; client_walker = client_walker->parent; } /* * Walks server's parent domains down to the same hierarchy level as * the client's domain. */ for (; server_layer > client_layer; server_layer--) server_walker = server_walker->parent; for (; client_layer >= 0; client_layer--) { if (landlock_get_scope_mask(client, client_layer) & scope) { /* * Client and server are at the same level in the * hierarchy. If the client is scoped, the request is * only allowed if this domain is also a server's * ancestor. */ return server_walker != client_walker; } client_walker = client_walker->parent; server_walker = server_walker->parent; } return false; } static bool sock_is_scoped(struct sock *const other, const struct landlock_ruleset *const domain) { const struct landlock_ruleset *dom_other; /* The credentials will not change. */ lockdep_assert_held(&unix_sk(other)->lock); dom_other = landlock_cred(other->sk_socket->file->f_cred)->domain; return domain_is_scoped(domain, dom_other, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET); } static bool is_abstract_socket(struct sock *const sock) { struct unix_address *addr = unix_sk(sock)->addr; if (!addr) return false; if (addr->len >= offsetof(struct sockaddr_un, sun_path) + 1 && addr->name->sun_path[0] == '\0') return true; return false; } static const struct access_masks unix_scope = { .scope = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET, }; static int hook_unix_stream_connect(struct sock *const sock, struct sock *const other, struct sock *const newsk) { size_t handle_layer; const struct landlock_cred_security *const subject = landlock_get_applicable_subject(current_cred(), unix_scope, &handle_layer); /* Quick return for non-landlocked tasks. */ if (!subject) return 0; if (!is_abstract_socket(other)) return 0; if (!sock_is_scoped(other, subject->domain)) return 0; landlock_log_denial(subject, &(struct landlock_request) { .type = LANDLOCK_REQUEST_SCOPE_ABSTRACT_UNIX_SOCKET, .audit = { .type = LSM_AUDIT_DATA_NET, .u.net = &(struct lsm_network_audit) { .sk = other, }, }, .layer_plus_one = handle_layer + 1, }); return -EPERM; } static int hook_unix_may_send(struct socket *const sock, struct socket *const other) { size_t handle_layer; const struct landlock_cred_security *const subject = landlock_get_applicable_subject(current_cred(), unix_scope, &handle_layer); if (!subject) return 0; /* * Checks if this datagram socket was already allowed to be connected * to other. */ if (unix_peer(sock->sk) == other->sk) return 0; if (!is_abstract_socket(other->sk)) return 0; if (!sock_is_scoped(other->sk, subject->domain)) return 0; landlock_log_denial(subject, &(struct landlock_request) { .type = LANDLOCK_REQUEST_SCOPE_ABSTRACT_UNIX_SOCKET, .audit = { .type = LSM_AUDIT_DATA_NET, .u.net = &(struct lsm_network_audit) { .sk = other->sk, }, }, .layer_plus_one = handle_layer + 1, }); return -EPERM; } static const struct access_masks signal_scope = { .scope = LANDLOCK_SCOPE_SIGNAL, }; static int hook_task_kill(struct task_struct *const p, struct kernel_siginfo *const info, const int sig, const struct cred *cred) { bool is_scoped; size_t handle_layer; const struct landlock_cred_security *subject; if (!cred) { /* * Always allow sending signals between threads of the same process. * This is required for process credential changes by the Native POSIX * Threads Library and implemented by the set*id(2) wrappers and * libcap(3) with tgkill(2). See nptl(7) and libpsx(3). * * This exception is similar to the __ptrace_may_access() one. */ if (same_thread_group(p, current)) return 0; /* Not dealing with USB IO. */ cred = current_cred(); } subject = landlock_get_applicable_subject(cred, signal_scope, &handle_layer); /* Quick return for non-landlocked tasks. */ if (!subject) return 0; scoped_guard(rcu) { is_scoped = domain_is_scoped(subject->domain, landlock_get_task_domain(p), signal_scope.scope); } if (!is_scoped) return 0; landlock_log_denial(subject, &(struct landlock_request) { .type = LANDLOCK_REQUEST_SCOPE_SIGNAL, .audit = { .type = LSM_AUDIT_DATA_TASK, .u.tsk = p, }, .layer_plus_one = handle_layer + 1, }); return -EPERM; } static int hook_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, int signum) { const struct landlock_cred_security *subject; bool is_scoped = false; /* Lock already held by send_sigio() and send_sigurg(). */ lockdep_assert_held(&fown->lock); subject = &landlock_file(fown->file)->fown_subject; /* * Quick return for unowned socket. * * subject->domain has already been filtered when saved by * hook_file_set_fowner(), so there is no need to call * landlock_get_applicable_subject() here. */ if (!subject->domain) return 0; scoped_guard(rcu) { is_scoped = domain_is_scoped(subject->domain, landlock_get_task_domain(tsk), signal_scope.scope); } if (!is_scoped) return 0; landlock_log_denial(subject, &(struct landlock_request) { .type = LANDLOCK_REQUEST_SCOPE_SIGNAL, .audit = { .type = LSM_AUDIT_DATA_TASK, .u.tsk = tsk, }, #ifdef CONFIG_AUDIT .layer_plus_one = landlock_file(fown->file)->fown_layer + 1, #endif /* CONFIG_AUDIT */ }); return -EPERM; } static struct security_hook_list landlock_hooks[] __ro_after_init = { LSM_HOOK_INIT(ptrace_access_check, hook_ptrace_access_check), LSM_HOOK_INIT(ptrace_traceme, hook_ptrace_traceme), LSM_HOOK_INIT(unix_stream_connect, hook_unix_stream_connect), LSM_HOOK_INIT(unix_may_send, hook_unix_may_send), LSM_HOOK_INIT(task_kill, hook_task_kill), LSM_HOOK_INIT(file_send_sigiotask, hook_file_send_sigiotask), }; __init void landlock_add_task_hooks(void) { security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks), &landlock_lsmid); } |
| 28 2 1 1 1 1 26 27 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 | // SPDX-License-Identifier: GPL-2.0-only /* * xfrm_replay.c - xfrm replay detection, derived from xfrm_state.c. * * Copyright (C) 2010 secunet Security Networks AG * Copyright (C) 2010 Steffen Klassert <steffen.klassert@secunet.com> */ #include <linux/export.h> #include <net/xfrm.h> u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq) { u32 seq, seq_hi, bottom; struct xfrm_replay_state_esn *replay_esn = x->replay_esn; if (!(x->props.flags & XFRM_STATE_ESN)) return 0; seq = ntohl(net_seq); seq_hi = replay_esn->seq_hi; bottom = replay_esn->seq - replay_esn->replay_window + 1; if (likely(replay_esn->seq >= replay_esn->replay_window - 1)) { /* A. same subspace */ if (unlikely(seq < bottom)) seq_hi++; } else { /* B. window spans two subspaces */ if (unlikely(seq >= bottom)) seq_hi--; } return seq_hi; } EXPORT_SYMBOL(xfrm_replay_seqhi); static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event); static void xfrm_replay_notify_esn(struct xfrm_state *x, int event); void xfrm_replay_notify(struct xfrm_state *x, int event) { struct km_event c; /* we send notify messages in case * 1. we updated on of the sequence numbers, and the seqno difference * is at least x->replay_maxdiff, in this case we also update the * timeout of our timer function * 2. if x->replay_maxage has elapsed since last update, * and there were changes * * The state structure must be locked! */ switch (x->repl_mode) { case XFRM_REPLAY_MODE_LEGACY: break; case XFRM_REPLAY_MODE_BMP: xfrm_replay_notify_bmp(x, event); return; case XFRM_REPLAY_MODE_ESN: xfrm_replay_notify_esn(x, event); return; } switch (event) { case XFRM_REPLAY_UPDATE: if (!x->replay_maxdiff || ((x->replay.seq - x->preplay.seq < x->replay_maxdiff) && (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff))) { if (x->xflags & XFRM_TIME_DEFER) event = XFRM_REPLAY_TIMEOUT; else return; } break; case XFRM_REPLAY_TIMEOUT: if (memcmp(&x->replay, &x->preplay, sizeof(struct xfrm_replay_state)) == 0) { x->xflags |= XFRM_TIME_DEFER; return; } break; } memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state)); c.event = XFRM_MSG_NEWAE; c.data.aevent = event; km_state_notify(x, &c); if (x->replay_maxage && !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) x->xflags &= ~XFRM_TIME_DEFER; } static int __xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) { int err = 0; struct net *net = xs_net(x); if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { XFRM_SKB_CB(skb)->seq.output.low = ++x->replay.oseq; XFRM_SKB_CB(skb)->seq.output.hi = 0; if (unlikely(x->replay.oseq == 0) && !(x->props.extra_flags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP)) { x->replay.oseq--; xfrm_audit_state_replay_overflow(x, skb); err = -EOVERFLOW; return err; } if (xfrm_aevent_is_on(net)) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } return err; } static int xfrm_replay_check_legacy(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq) { u32 diff; u32 seq = ntohl(net_seq); if (!x->props.replay_window) return 0; if (unlikely(seq == 0)) goto err; if (likely(seq > x->replay.seq)) return 0; diff = x->replay.seq - seq; if (diff >= x->props.replay_window) { x->stats.replay_window++; goto err; } if (x->replay.bitmap & (1U << diff)) { x->stats.replay++; goto err; } return 0; err: xfrm_audit_state_replay(x, skb, net_seq); return -EINVAL; } static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq); static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq); void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) { u32 diff, seq; switch (x->repl_mode) { case XFRM_REPLAY_MODE_LEGACY: break; case XFRM_REPLAY_MODE_BMP: return xfrm_replay_advance_bmp(x, net_seq); case XFRM_REPLAY_MODE_ESN: return xfrm_replay_advance_esn(x, net_seq); } if (!x->props.replay_window) return; seq = ntohl(net_seq); if (seq > x->replay.seq) { diff = seq - x->replay.seq; if (diff < x->props.replay_window) x->replay.bitmap = ((x->replay.bitmap) << diff) | 1; else x->replay.bitmap = 1; x->replay.seq = seq; } else { diff = x->replay.seq - seq; x->replay.bitmap |= (1U << diff); } if (xfrm_aevent_is_on(xs_net(x))) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb) { int err = 0; struct xfrm_replay_state_esn *replay_esn = x->replay_esn; struct net *net = xs_net(x); if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq; XFRM_SKB_CB(skb)->seq.output.hi = 0; if (unlikely(replay_esn->oseq == 0) && !(x->props.extra_flags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP)) { replay_esn->oseq--; xfrm_audit_state_replay_overflow(x, skb); err = -EOVERFLOW; return err; } if (xfrm_aevent_is_on(net)) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } return err; } static int xfrm_replay_check_bmp(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq) { unsigned int bitnr, nr; struct xfrm_replay_state_esn *replay_esn = x->replay_esn; u32 pos; u32 seq = ntohl(net_seq); u32 diff = replay_esn->seq - seq; if (!replay_esn->replay_window) return 0; if (unlikely(seq == 0)) goto err; if (likely(seq > replay_esn->seq)) return 0; if (diff >= replay_esn->replay_window) { x->stats.replay_window++; goto err; } pos = (replay_esn->seq - 1) % replay_esn->replay_window; if (pos >= diff) bitnr = (pos - diff) % replay_esn->replay_window; else bitnr = replay_esn->replay_window - (diff - pos); nr = bitnr >> 5; bitnr = bitnr & 0x1F; if (replay_esn->bmp[nr] & (1U << bitnr)) goto err_replay; return 0; err_replay: x->stats.replay++; err: xfrm_audit_state_replay(x, skb, net_seq); return -EINVAL; } static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) { unsigned int bitnr, nr, i; u32 diff; struct xfrm_replay_state_esn *replay_esn = x->replay_esn; u32 seq = ntohl(net_seq); u32 pos; if (!replay_esn->replay_window) return; pos = (replay_esn->seq - 1) % replay_esn->replay_window; if (seq > replay_esn->seq) { diff = seq - replay_esn->seq; if (diff < replay_esn->replay_window) { for (i = 1; i < diff; i++) { bitnr = (pos + i) % replay_esn->replay_window; nr = bitnr >> 5; bitnr = bitnr & 0x1F; replay_esn->bmp[nr] &= ~(1U << bitnr); } } else { nr = (replay_esn->replay_window - 1) >> 5; for (i = 0; i <= nr; i++) replay_esn->bmp[i] = 0; } bitnr = (pos + diff) % replay_esn->replay_window; replay_esn->seq = seq; } else { diff = replay_esn->seq - seq; if (pos >= diff) bitnr = (pos - diff) % replay_esn->replay_window; else bitnr = replay_esn->replay_window - (diff - pos); } nr = bitnr >> 5; bitnr = bitnr & 0x1F; replay_esn->bmp[nr] |= (1U << bitnr); if (xfrm_aevent_is_on(xs_net(x))) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) { struct km_event c; struct xfrm_replay_state_esn *replay_esn = x->replay_esn; struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn; /* we send notify messages in case * 1. we updated on of the sequence numbers, and the seqno difference * is at least x->replay_maxdiff, in this case we also update the * timeout of our timer function * 2. if x->replay_maxage has elapsed since last update, * and there were changes * * The state structure must be locked! */ switch (event) { case XFRM_REPLAY_UPDATE: if (!x->replay_maxdiff || ((replay_esn->seq - preplay_esn->seq < x->replay_maxdiff) && (replay_esn->oseq - preplay_esn->oseq < x->replay_maxdiff))) { if (x->xflags & XFRM_TIME_DEFER) event = XFRM_REPLAY_TIMEOUT; else return; } break; case XFRM_REPLAY_TIMEOUT: if (memcmp(x->replay_esn, x->preplay_esn, xfrm_replay_state_esn_len(replay_esn)) == 0) { x->xflags |= XFRM_TIME_DEFER; return; } break; } memcpy(x->preplay_esn, x->replay_esn, xfrm_replay_state_esn_len(replay_esn)); c.event = XFRM_MSG_NEWAE; c.data.aevent = event; km_state_notify(x, &c); if (x->replay_maxage && !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) x->xflags &= ~XFRM_TIME_DEFER; } static void xfrm_replay_notify_esn(struct xfrm_state *x, int event) { u32 seq_diff, oseq_diff; struct km_event c; struct xfrm_replay_state_esn *replay_esn = x->replay_esn; struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn; /* we send notify messages in case * 1. we updated on of the sequence numbers, and the seqno difference * is at least x->replay_maxdiff, in this case we also update the * timeout of our timer function * 2. if x->replay_maxage has elapsed since last update, * and there were changes * * The state structure must be locked! */ switch (event) { case XFRM_REPLAY_UPDATE: if (x->replay_maxdiff) { if (replay_esn->seq_hi == preplay_esn->seq_hi) seq_diff = replay_esn->seq - preplay_esn->seq; else seq_diff = ~preplay_esn->seq + replay_esn->seq + 1; if (replay_esn->oseq_hi == preplay_esn->oseq_hi) oseq_diff = replay_esn->oseq - preplay_esn->oseq; else oseq_diff = ~preplay_esn->oseq + replay_esn->oseq + 1; if (seq_diff >= x->replay_maxdiff || oseq_diff >= x->replay_maxdiff) break; } if (x->xflags & XFRM_TIME_DEFER) event = XFRM_REPLAY_TIMEOUT; else return; break; case XFRM_REPLAY_TIMEOUT: if (memcmp(x->replay_esn, x->preplay_esn, xfrm_replay_state_esn_len(replay_esn)) == 0) { x->xflags |= XFRM_TIME_DEFER; return; } break; } memcpy(x->preplay_esn, x->replay_esn, xfrm_replay_state_esn_len(replay_esn)); c.event = XFRM_MSG_NEWAE; c.data.aevent = event; km_state_notify(x, &c); if (x->replay_maxage && !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) x->xflags &= ~XFRM_TIME_DEFER; } static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb) { int err = 0; struct xfrm_replay_state_esn *replay_esn = x->replay_esn; struct net *net = xs_net(x); if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq; XFRM_SKB_CB(skb)->seq.output.hi = replay_esn->oseq_hi; if (unlikely(replay_esn->oseq == 0)) { XFRM_SKB_CB(skb)->seq.output.hi = ++replay_esn->oseq_hi; if (replay_esn->oseq_hi == 0) { replay_esn->oseq--; replay_esn->oseq_hi--; xfrm_audit_state_replay_overflow(x, skb); err = -EOVERFLOW; return err; } } if (xfrm_aevent_is_on(net)) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } return err; } static int xfrm_replay_check_esn(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq) { unsigned int bitnr, nr; u32 diff; struct xfrm_replay_state_esn *replay_esn = x->replay_esn; u32 pos; u32 seq = ntohl(net_seq); u32 wsize = replay_esn->replay_window; u32 top = replay_esn->seq; u32 bottom = top - wsize + 1; if (!wsize) return 0; if (unlikely(seq == 0 && replay_esn->seq_hi == 0 && (replay_esn->seq < replay_esn->replay_window - 1))) goto err; diff = top - seq; if (likely(top >= wsize - 1)) { /* A. same subspace */ if (likely(seq > top) || seq < bottom) return 0; } else { /* B. window spans two subspaces */ if (likely(seq > top && seq < bottom)) return 0; if (seq >= bottom) diff = ~seq + top + 1; } if (diff >= replay_esn->replay_window) { x->stats.replay_window++; goto err; } pos = (replay_esn->seq - 1) % replay_esn->replay_window; if (pos >= diff) bitnr = (pos - diff) % replay_esn->replay_window; else bitnr = replay_esn->replay_window - (diff - pos); nr = bitnr >> 5; bitnr = bitnr & 0x1F; if (replay_esn->bmp[nr] & (1U << bitnr)) goto err_replay; return 0; err_replay: x->stats.replay++; err: xfrm_audit_state_replay(x, skb, net_seq); return -EINVAL; } int xfrm_replay_check(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq) { switch (x->repl_mode) { case XFRM_REPLAY_MODE_LEGACY: break; case XFRM_REPLAY_MODE_BMP: return xfrm_replay_check_bmp(x, skb, net_seq); case XFRM_REPLAY_MODE_ESN: return xfrm_replay_check_esn(x, skb, net_seq); } return xfrm_replay_check_legacy(x, skb, net_seq); } static int xfrm_replay_recheck_esn(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq) { if (unlikely(XFRM_SKB_CB(skb)->seq.input.hi != htonl(xfrm_replay_seqhi(x, net_seq)))) { x->stats.replay_window++; return -EINVAL; } return xfrm_replay_check_esn(x, skb, net_seq); } int xfrm_replay_recheck(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq) { switch (x->repl_mode) { case XFRM_REPLAY_MODE_LEGACY: break; case XFRM_REPLAY_MODE_BMP: /* no special recheck treatment */ return xfrm_replay_check_bmp(x, skb, net_seq); case XFRM_REPLAY_MODE_ESN: return xfrm_replay_recheck_esn(x, skb, net_seq); } return xfrm_replay_check_legacy(x, skb, net_seq); } static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) { unsigned int bitnr, nr, i; int wrap; u32 diff, pos, seq, seq_hi; struct xfrm_replay_state_esn *replay_esn = x->replay_esn; if (!replay_esn->replay_window) return; seq = ntohl(net_seq); pos = (replay_esn->seq - 1) % replay_esn->replay_window; seq_hi = xfrm_replay_seqhi(x, net_seq); wrap = seq_hi - replay_esn->seq_hi; if ((!wrap && seq > replay_esn->seq) || wrap > 0) { if (likely(!wrap)) diff = seq - replay_esn->seq; else diff = ~replay_esn->seq + seq + 1; if (diff < replay_esn->replay_window) { for (i = 1; i < diff; i++) { bitnr = (pos + i) % replay_esn->replay_window; nr = bitnr >> 5; bitnr = bitnr & 0x1F; replay_esn->bmp[nr] &= ~(1U << bitnr); } } else { nr = (replay_esn->replay_window - 1) >> 5; for (i = 0; i <= nr; i++) replay_esn->bmp[i] = 0; } bitnr = (pos + diff) % replay_esn->replay_window; replay_esn->seq = seq; if (unlikely(wrap > 0)) replay_esn->seq_hi++; } else { diff = replay_esn->seq - seq; if (pos >= diff) bitnr = (pos - diff) % replay_esn->replay_window; else bitnr = replay_esn->replay_window - (diff - pos); } xfrm_dev_state_advance_esn(x); nr = bitnr >> 5; bitnr = bitnr & 0x1F; replay_esn->bmp[nr] |= (1U << bitnr); if (xfrm_aevent_is_on(xs_net(x))) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } #ifdef CONFIG_XFRM_OFFLOAD static int xfrm_replay_overflow_offload(struct xfrm_state *x, struct sk_buff *skb) { int err = 0; struct net *net = xs_net(x); struct xfrm_offload *xo = xfrm_offload(skb); __u32 oseq = x->replay.oseq; if (!xo) return __xfrm_replay_overflow(x, skb); if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { if (!skb_is_gso(skb)) { XFRM_SKB_CB(skb)->seq.output.low = ++oseq; xo->seq.low = oseq; } else { XFRM_SKB_CB(skb)->seq.output.low = oseq + 1; xo->seq.low = oseq + 1; oseq += skb_shinfo(skb)->gso_segs; } XFRM_SKB_CB(skb)->seq.output.hi = 0; xo->seq.hi = 0; if (unlikely(oseq < x->replay.oseq) && !(x->props.extra_flags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP)) { xfrm_audit_state_replay_overflow(x, skb); err = -EOVERFLOW; return err; } x->replay.oseq = oseq; if (xfrm_aevent_is_on(net)) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } return err; } static int xfrm_replay_overflow_offload_bmp(struct xfrm_state *x, struct sk_buff *skb) { int err = 0; struct xfrm_offload *xo = xfrm_offload(skb); struct xfrm_replay_state_esn *replay_esn = x->replay_esn; struct net *net = xs_net(x); __u32 oseq = replay_esn->oseq; if (!xo) return xfrm_replay_overflow_bmp(x, skb); if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { if (!skb_is_gso(skb)) { XFRM_SKB_CB(skb)->seq.output.low = ++oseq; xo->seq.low = oseq; } else { XFRM_SKB_CB(skb)->seq.output.low = oseq + 1; xo->seq.low = oseq + 1; oseq += skb_shinfo(skb)->gso_segs; } XFRM_SKB_CB(skb)->seq.output.hi = 0; xo->seq.hi = 0; if (unlikely(oseq < replay_esn->oseq) && !(x->props.extra_flags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP)) { xfrm_audit_state_replay_overflow(x, skb); err = -EOVERFLOW; return err; } else { replay_esn->oseq = oseq; } if (xfrm_aevent_is_on(net)) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } return err; } static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff *skb) { int err = 0; struct xfrm_offload *xo = xfrm_offload(skb); struct xfrm_replay_state_esn *replay_esn = x->replay_esn; struct net *net = xs_net(x); __u32 oseq = replay_esn->oseq; __u32 oseq_hi = replay_esn->oseq_hi; if (!xo) return xfrm_replay_overflow_esn(x, skb); if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { if (!skb_is_gso(skb)) { XFRM_SKB_CB(skb)->seq.output.low = ++oseq; XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi; xo->seq.low = oseq; xo->seq.hi = oseq_hi; } else { XFRM_SKB_CB(skb)->seq.output.low = oseq + 1; XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi; xo->seq.low = oseq + 1; xo->seq.hi = oseq_hi; oseq += skb_shinfo(skb)->gso_segs; } if (unlikely(oseq < replay_esn->oseq)) { replay_esn->oseq_hi = ++oseq_hi; if (xo->seq.low < replay_esn->oseq) { XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi; xo->seq.hi = oseq_hi; } if (replay_esn->oseq_hi == 0) { replay_esn->oseq--; replay_esn->oseq_hi--; xfrm_audit_state_replay_overflow(x, skb); err = -EOVERFLOW; return err; } } replay_esn->oseq = oseq; xfrm_dev_state_advance_esn(x); if (xfrm_aevent_is_on(net)) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } return err; } int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) { switch (x->repl_mode) { case XFRM_REPLAY_MODE_LEGACY: break; case XFRM_REPLAY_MODE_BMP: return xfrm_replay_overflow_offload_bmp(x, skb); case XFRM_REPLAY_MODE_ESN: return xfrm_replay_overflow_offload_esn(x, skb); } return xfrm_replay_overflow_offload(x, skb); } #else int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) { switch (x->repl_mode) { case XFRM_REPLAY_MODE_LEGACY: break; case XFRM_REPLAY_MODE_BMP: return xfrm_replay_overflow_bmp(x, skb); case XFRM_REPLAY_MODE_ESN: return xfrm_replay_overflow_esn(x, skb); } return __xfrm_replay_overflow(x, skb); } #endif int xfrm_init_replay(struct xfrm_state *x, struct netlink_ext_ack *extack) { struct xfrm_replay_state_esn *replay_esn = x->replay_esn; if (replay_esn) { if (replay_esn->replay_window > replay_esn->bmp_len * sizeof(__u32) * 8) { NL_SET_ERR_MSG(extack, "ESN replay window is too large for the chosen bitmap size"); return -EINVAL; } if (x->props.flags & XFRM_STATE_ESN) { if (replay_esn->replay_window == 0 && (!x->dir || x->dir == XFRM_SA_DIR_IN)) { NL_SET_ERR_MSG(extack, "ESN replay window must be > 0"); return -EINVAL; } x->repl_mode = XFRM_REPLAY_MODE_ESN; } else { x->repl_mode = XFRM_REPLAY_MODE_BMP; } } else { x->repl_mode = XFRM_REPLAY_MODE_LEGACY; } return 0; } EXPORT_SYMBOL(xfrm_init_replay); |
| 16 16 16 16 16 16 16 16 16 16 16 16 1 15 1 15 4 15 14 10 1 13 1 1 15 1 3 1 15 2 1 5 2 10 3 3 3 3 3 3 1 16 1 16 1 16 1 1 1 1 16 16 16 16 16 16 16 15 15 1 16 15 12 10 10 8 10 1 10 3 3 1 1 1 1 1 15 14 1 1 6 6 6 6 6 6 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 | // SPDX-License-Identifier: GPL-2.0+ /* * Driver for USB Mass Storage compliant devices * * Current development and maintenance by: * (c) 1999-2003 Matthew Dharm (mdharm-usb@one-eyed-alien.net) * * Developed with the assistance of: * (c) 2000 David L. Brown, Jr. (usb-storage@davidb.org) * (c) 2003-2009 Alan Stern (stern@rowland.harvard.edu) * * Initial work by: * (c) 1999 Michael Gee (michael@linuxspecific.com) * * usb_device_id support by Adam J. Richter (adam@yggdrasil.com): * (c) 2000 Yggdrasil Computing, Inc. * * This driver is based on the 'USB Mass Storage Class' document. This * describes in detail the protocol used to communicate with such * devices. Clearly, the designers had SCSI and ATAPI commands in * mind when they created this document. The commands are all very * similar to commands in the SCSI-II and ATAPI specifications. * * It is important to note that in a number of cases this class * exhibits class-specific exemptions from the USB specification. * Notably the usage of NAK, STALL and ACK differs from the norm, in * that they are used to communicate wait, failed and OK on commands. * * Also, for certain devices, the interrupt endpoint is used to convey * status of a command. */ #ifdef CONFIG_USB_STORAGE_DEBUG #define DEBUG #endif #include <linux/sched.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/kthread.h> #include <linux/mutex.h> #include <linux/utsname.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_device.h> #include "usb.h" #include <linux/usb/hcd.h> #include "scsiglue.h" #include "transport.h" #include "protocol.h" #include "debug.h" #include "initializers.h" #include "sierra_ms.h" #include "option_ms.h" #if IS_ENABLED(CONFIG_USB_UAS) #include "uas-detect.h" #endif #define DRV_NAME "usb-storage" /* Some informational data */ MODULE_AUTHOR("Matthew Dharm <mdharm-usb@one-eyed-alien.net>"); MODULE_DESCRIPTION("USB Mass Storage driver for Linux"); MODULE_LICENSE("GPL"); static unsigned int delay_use = 1 * MSEC_PER_SEC; /** * parse_delay_str - parse an unsigned decimal integer delay * @str: String to parse. * @ndecimals: Number of decimal to scale up. * @suffix: Suffix string to parse. * @val: Where to store the parsed value. * * Parse an unsigned decimal value in @str, optionally end with @suffix. * Stores the parsed value in @val just as it is if @str ends with @suffix. * Otherwise store the value scale up by 10^(@ndecimal). * * Returns 0 on success, a negative error code otherwise. */ static int parse_delay_str(const char *str, int ndecimals, const char *suffix, unsigned int *val) { int n, n2, l; char buf[16]; l = strlen(suffix); n = strlen(str); if (n > 0 && str[n - 1] == '\n') --n; if (n >= l && !strncmp(&str[n - l], suffix, l)) { n -= l; n2 = 0; } else n2 = ndecimals; if (n + n2 > sizeof(buf) - 1) return -EINVAL; memcpy(buf, str, n); while (n2-- > 0) buf[n++] = '0'; buf[n] = 0; return kstrtouint(buf, 10, val); } /** * format_delay_ms - format an integer value into a delay string * @val: The integer value to format, scaled by 10^(@ndecimals). * @ndecimals: Number of decimal to scale down. * @suffix: Suffix string to format. * @str: Where to store the formatted string. * @size: The size of buffer for @str. * * Format an integer value in @val scale down by 10^(@ndecimals) without @suffix * if @val is divisible by 10^(@ndecimals). * Otherwise format a value in @val just as it is with @suffix * * Returns the number of characters written into @str. */ static int format_delay_ms(unsigned int val, int ndecimals, const char *suffix, char *str, int size) { u64 delay_ms = val; unsigned int rem = do_div(delay_ms, int_pow(10, ndecimals)); int ret; if (rem) ret = scnprintf(str, size, "%u%s\n", val, suffix); else ret = scnprintf(str, size, "%u\n", (unsigned int)delay_ms); return ret; } static int delay_use_set(const char *s, const struct kernel_param *kp) { unsigned int delay_ms; int ret; ret = parse_delay_str(skip_spaces(s), 3, "ms", &delay_ms); if (ret < 0) return ret; *((unsigned int *)kp->arg) = delay_ms; return 0; } static int delay_use_get(char *s, const struct kernel_param *kp) { unsigned int delay_ms = *((unsigned int *)kp->arg); return format_delay_ms(delay_ms, 3, "ms", s, PAGE_SIZE); } static const struct kernel_param_ops delay_use_ops = { .set = delay_use_set, .get = delay_use_get, }; module_param_cb(delay_use, &delay_use_ops, &delay_use, 0644); MODULE_PARM_DESC(delay_use, "time to delay before using a new device"); static char quirks[128]; module_param_string(quirks, quirks, sizeof(quirks), S_IRUGO | S_IWUSR); MODULE_PARM_DESC(quirks, "supplemental list of device IDs and their quirks"); /* * The entries in this table correspond, line for line, * with the entries in usb_storage_usb_ids[], defined in usual-tables.c. */ /* *The vendor name should be kept at eight characters or less, and * the product name should be kept at 16 characters or less. If a device * has the US_FL_FIX_INQUIRY flag, then the vendor and product names * normally generated by a device through the INQUIRY response will be * taken from this list, and this is the reason for the above size * restriction. However, if the flag is not present, then you * are free to use as many characters as you like. */ #define UNUSUAL_DEV(idVendor, idProduct, bcdDeviceMin, bcdDeviceMax, \ vendor_name, product_name, use_protocol, use_transport, \ init_function, Flags) \ { \ .vendorName = vendor_name, \ .productName = product_name, \ .useProtocol = use_protocol, \ .useTransport = use_transport, \ .initFunction = init_function, \ } #define COMPLIANT_DEV UNUSUAL_DEV #define USUAL_DEV(use_protocol, use_transport) \ { \ .useProtocol = use_protocol, \ .useTransport = use_transport, \ } static const struct us_unusual_dev us_unusual_dev_list[] = { # include "unusual_devs.h" { } /* Terminating entry */ }; static const struct us_unusual_dev for_dynamic_ids = USUAL_DEV(USB_SC_SCSI, USB_PR_BULK); #undef UNUSUAL_DEV #undef COMPLIANT_DEV #undef USUAL_DEV #ifdef CONFIG_LOCKDEP static struct lock_class_key us_interface_key[USB_MAXINTERFACES]; static void us_set_lock_class(struct mutex *mutex, struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); struct usb_host_config *config = udev->actconfig; int i; for (i = 0; i < config->desc.bNumInterfaces; i++) { if (config->interface[i] == intf) break; } BUG_ON(i == config->desc.bNumInterfaces); lockdep_set_class(mutex, &us_interface_key[i]); } #else static void us_set_lock_class(struct mutex *mutex, struct usb_interface *intf) { } #endif #ifdef CONFIG_PM /* Minimal support for suspend and resume */ int usb_stor_suspend(struct usb_interface *iface, pm_message_t message) { struct us_data *us = usb_get_intfdata(iface); /* Wait until no command is running */ mutex_lock(&us->dev_mutex); if (us->suspend_resume_hook) (us->suspend_resume_hook)(us, US_SUSPEND); /* * When runtime PM is working, we'll set a flag to indicate * whether we should autoresume when a SCSI request arrives. */ mutex_unlock(&us->dev_mutex); return 0; } EXPORT_SYMBOL_GPL(usb_stor_suspend); int usb_stor_resume(struct usb_interface *iface) { struct us_data *us = usb_get_intfdata(iface); mutex_lock(&us->dev_mutex); if (us->suspend_resume_hook) (us->suspend_resume_hook)(us, US_RESUME); mutex_unlock(&us->dev_mutex); return 0; } EXPORT_SYMBOL_GPL(usb_stor_resume); int usb_stor_reset_resume(struct usb_interface *iface) { struct us_data *us = usb_get_intfdata(iface); /* Report the reset to the SCSI core */ usb_stor_report_bus_reset(us); /* * If any of the subdrivers implemented a reinitialization scheme, * this is where the callback would be invoked. */ return 0; } EXPORT_SYMBOL_GPL(usb_stor_reset_resume); #endif /* CONFIG_PM */ /* * The next two routines get called just before and just after * a USB port reset, whether from this driver or a different one. */ int usb_stor_pre_reset(struct usb_interface *iface) { struct us_data *us = usb_get_intfdata(iface); /* Make sure no command runs during the reset */ mutex_lock(&us->dev_mutex); return 0; } EXPORT_SYMBOL_GPL(usb_stor_pre_reset); int usb_stor_post_reset(struct usb_interface *iface) { struct us_data *us = usb_get_intfdata(iface); /* Report the reset to the SCSI core */ usb_stor_report_bus_reset(us); /* * If any of the subdrivers implemented a reinitialization scheme, * this is where the callback would be invoked. */ mutex_unlock(&us->dev_mutex); return 0; } EXPORT_SYMBOL_GPL(usb_stor_post_reset); /* * fill_inquiry_response takes an unsigned char array (which must * be at least 36 characters) and populates the vendor name, * product name, and revision fields. Then the array is copied * into the SCSI command's response buffer (oddly enough * called request_buffer). data_len contains the length of the * data array, which again must be at least 36. */ void fill_inquiry_response(struct us_data *us, unsigned char *data, unsigned int data_len) { if (data_len < 36) /* You lose. */ return; memset(data+8, ' ', 28); if (data[0]&0x20) { /* * USB device currently not connected. Return * peripheral qualifier 001b ("...however, the * physical device is not currently connected * to this logical unit") and leave vendor and * product identification empty. ("If the target * does store some of the INQUIRY data on the * device, it may return zeros or ASCII spaces * (20h) in those fields until the data is * available from the device."). */ } else { u16 bcdDevice = le16_to_cpu(us->pusb_dev->descriptor.bcdDevice); int n; n = strlen(us->unusual_dev->vendorName); memcpy(data+8, us->unusual_dev->vendorName, min(8, n)); n = strlen(us->unusual_dev->productName); memcpy(data+16, us->unusual_dev->productName, min(16, n)); data[32] = 0x30 + ((bcdDevice>>12) & 0x0F); data[33] = 0x30 + ((bcdDevice>>8) & 0x0F); data[34] = 0x30 + ((bcdDevice>>4) & 0x0F); data[35] = 0x30 + ((bcdDevice) & 0x0F); } usb_stor_set_xfer_buf(data, data_len, us->srb); } EXPORT_SYMBOL_GPL(fill_inquiry_response); static int usb_stor_control_thread(void * __us) { struct us_data *us = (struct us_data *)__us; struct Scsi_Host *host = us_to_host(us); struct scsi_cmnd *srb; for (;;) { usb_stor_dbg(us, "*** thread sleeping\n"); if (wait_for_completion_interruptible(&us->cmnd_ready)) break; usb_stor_dbg(us, "*** thread awakened\n"); /* lock the device pointers */ mutex_lock(&(us->dev_mutex)); /* lock access to the state */ scsi_lock(host); /* When we are called with no command pending, we're done */ srb = us->srb; if (srb == NULL) { scsi_unlock(host); mutex_unlock(&us->dev_mutex); usb_stor_dbg(us, "-- exiting\n"); break; } /* has the command timed out *already* ? */ if (test_bit(US_FLIDX_TIMED_OUT, &us->dflags)) { srb->result = DID_ABORT << 16; goto SkipForAbort; } scsi_unlock(host); /* * reject the command if the direction indicator * is UNKNOWN */ if (srb->sc_data_direction == DMA_BIDIRECTIONAL) { usb_stor_dbg(us, "UNKNOWN data direction\n"); srb->result = DID_ERROR << 16; } /* * reject if target != 0 or if LUN is higher than * the maximum known LUN */ else if (srb->device->id && !(us->fflags & US_FL_SCM_MULT_TARG)) { usb_stor_dbg(us, "Bad target number (%d:%llu)\n", srb->device->id, srb->device->lun); srb->result = DID_BAD_TARGET << 16; } else if (srb->device->lun > us->max_lun) { usb_stor_dbg(us, "Bad LUN (%d:%llu)\n", srb->device->id, srb->device->lun); srb->result = DID_BAD_TARGET << 16; } /* * Handle those devices which need us to fake * their inquiry data */ else if ((srb->cmnd[0] == INQUIRY) && (us->fflags & US_FL_FIX_INQUIRY)) { unsigned char data_ptr[36] = { 0x00, 0x80, 0x02, 0x02, 0x1F, 0x00, 0x00, 0x00}; usb_stor_dbg(us, "Faking INQUIRY command\n"); fill_inquiry_response(us, data_ptr, 36); srb->result = SAM_STAT_GOOD; } /* we've got a command, let's do it! */ else { US_DEBUG(usb_stor_show_command(us, srb)); us->proto_handler(srb, us); usb_mark_last_busy(us->pusb_dev); } /* lock access to the state */ scsi_lock(host); /* was the command aborted? */ if (srb->result == DID_ABORT << 16) { SkipForAbort: usb_stor_dbg(us, "scsi command aborted\n"); srb = NULL; /* Don't call scsi_done() */ } /* * If an abort request was received we need to signal that * the abort has finished. The proper test for this is * the TIMED_OUT flag, not srb->result == DID_ABORT, because * the timeout might have occurred after the command had * already completed with a different result code. */ if (test_bit(US_FLIDX_TIMED_OUT, &us->dflags)) { complete(&(us->notify)); /* Allow USB transfers to resume */ clear_bit(US_FLIDX_ABORTING, &us->dflags); clear_bit(US_FLIDX_TIMED_OUT, &us->dflags); } /* finished working on this command */ us->srb = NULL; scsi_unlock(host); /* unlock the device pointers */ mutex_unlock(&us->dev_mutex); /* now that the locks are released, notify the SCSI core */ if (srb) { usb_stor_dbg(us, "scsi cmd done, result=0x%x\n", srb->result); scsi_done_direct(srb); } } /* for (;;) */ /* Wait until we are told to stop */ for (;;) { set_current_state(TASK_INTERRUPTIBLE); if (kthread_should_stop()) break; schedule(); } __set_current_state(TASK_RUNNING); return 0; } /*********************************************************************** * Device probing and disconnecting ***********************************************************************/ /* Associate our private data with the USB device */ static int associate_dev(struct us_data *us, struct usb_interface *intf) { /* Fill in the device-related fields */ us->pusb_dev = interface_to_usbdev(intf); us->pusb_intf = intf; us->ifnum = intf->cur_altsetting->desc.bInterfaceNumber; usb_stor_dbg(us, "Vendor: 0x%04x, Product: 0x%04x, Revision: 0x%04x\n", le16_to_cpu(us->pusb_dev->descriptor.idVendor), le16_to_cpu(us->pusb_dev->descriptor.idProduct), le16_to_cpu(us->pusb_dev->descriptor.bcdDevice)); usb_stor_dbg(us, "Interface Subclass: 0x%02x, Protocol: 0x%02x\n", intf->cur_altsetting->desc.bInterfaceSubClass, intf->cur_altsetting->desc.bInterfaceProtocol); /* Store our private data in the interface */ usb_set_intfdata(intf, us); /* Allocate the control/setup and DMA-mapped buffers */ us->cr = kmalloc(sizeof(*us->cr), GFP_KERNEL); if (!us->cr) return -ENOMEM; us->iobuf = usb_alloc_coherent(us->pusb_dev, US_IOBUF_SIZE, GFP_KERNEL, &us->iobuf_dma); if (!us->iobuf) { usb_stor_dbg(us, "I/O buffer allocation failed\n"); return -ENOMEM; } return 0; } /* Works only for digits and letters, but small and fast */ #define TOLOWER(x) ((x) | 0x20) /* Adjust device flags based on the "quirks=" module parameter */ void usb_stor_adjust_quirks(struct usb_device *udev, u64 *fflags) { char *p; u16 vid = le16_to_cpu(udev->descriptor.idVendor); u16 pid = le16_to_cpu(udev->descriptor.idProduct); u64 f = 0; u64 mask = (US_FL_SANE_SENSE | US_FL_BAD_SENSE | US_FL_FIX_CAPACITY | US_FL_IGNORE_UAS | US_FL_CAPACITY_HEURISTICS | US_FL_IGNORE_DEVICE | US_FL_NOT_LOCKABLE | US_FL_MAX_SECTORS_64 | US_FL_CAPACITY_OK | US_FL_IGNORE_RESIDUE | US_FL_SINGLE_LUN | US_FL_NO_WP_DETECT | US_FL_NO_READ_DISC_INFO | US_FL_NO_READ_CAPACITY_16 | US_FL_INITIAL_READ10 | US_FL_WRITE_CACHE | US_FL_NO_ATA_1X | US_FL_NO_REPORT_OPCODES | US_FL_MAX_SECTORS_240 | US_FL_NO_REPORT_LUNS | US_FL_ALWAYS_SYNC); p = quirks; while (*p) { /* Each entry consists of VID:PID:flags */ if (vid == simple_strtoul(p, &p, 16) && *p == ':' && pid == simple_strtoul(p+1, &p, 16) && *p == ':') break; /* Move forward to the next entry */ while (*p) { if (*p++ == ',') break; } } if (!*p) /* No match */ return; /* Collect the flags */ while (*++p && *p != ',') { switch (TOLOWER(*p)) { case 'a': f |= US_FL_SANE_SENSE; break; case 'b': f |= US_FL_BAD_SENSE; break; case 'c': f |= US_FL_FIX_CAPACITY; break; case 'd': f |= US_FL_NO_READ_DISC_INFO; break; case 'e': f |= US_FL_NO_READ_CAPACITY_16; break; case 'f': f |= US_FL_NO_REPORT_OPCODES; break; case 'g': f |= US_FL_MAX_SECTORS_240; break; case 'h': f |= US_FL_CAPACITY_HEURISTICS; break; case 'i': f |= US_FL_IGNORE_DEVICE; break; case 'j': f |= US_FL_NO_REPORT_LUNS; break; case 'k': f |= US_FL_NO_SAME; break; case 'l': f |= US_FL_NOT_LOCKABLE; break; case 'm': f |= US_FL_MAX_SECTORS_64; break; case 'n': f |= US_FL_INITIAL_READ10; break; case 'o': f |= US_FL_CAPACITY_OK; break; case 'p': f |= US_FL_WRITE_CACHE; break; case 'r': f |= US_FL_IGNORE_RESIDUE; break; case 's': f |= US_FL_SINGLE_LUN; break; case 't': f |= US_FL_NO_ATA_1X; break; case 'u': f |= US_FL_IGNORE_UAS; break; case 'w': f |= US_FL_NO_WP_DETECT; break; case 'y': f |= US_FL_ALWAYS_SYNC; break; /* Ignore unrecognized flag characters */ } } *fflags = (*fflags & ~mask) | f; } EXPORT_SYMBOL_GPL(usb_stor_adjust_quirks); /* Get the unusual_devs entries and the string descriptors */ static int get_device_info(struct us_data *us, const struct usb_device_id *id, const struct us_unusual_dev *unusual_dev) { struct usb_device *dev = us->pusb_dev; struct usb_interface_descriptor *idesc = &us->pusb_intf->cur_altsetting->desc; struct device *pdev = &us->pusb_intf->dev; /* Store the entries */ us->unusual_dev = unusual_dev; us->subclass = (unusual_dev->useProtocol == USB_SC_DEVICE) ? idesc->bInterfaceSubClass : unusual_dev->useProtocol; us->protocol = (unusual_dev->useTransport == USB_PR_DEVICE) ? idesc->bInterfaceProtocol : unusual_dev->useTransport; us->fflags = id->driver_info; usb_stor_adjust_quirks(us->pusb_dev, &us->fflags); if (us->fflags & US_FL_IGNORE_DEVICE) { dev_info(pdev, "device ignored\n"); return -ENODEV; } /* * This flag is only needed when we're in high-speed, so let's * disable it if we're in full-speed */ if (dev->speed != USB_SPEED_HIGH) us->fflags &= ~US_FL_GO_SLOW; if (us->fflags) dev_info(pdev, "Quirks match for vid %04x pid %04x: %llx\n", le16_to_cpu(dev->descriptor.idVendor), le16_to_cpu(dev->descriptor.idProduct), us->fflags); /* * Log a message if a non-generic unusual_dev entry contains an * unnecessary subclass or protocol override. This may stimulate * reports from users that will help us remove unneeded entries * from the unusual_devs.h table. */ if (id->idVendor || id->idProduct) { static const char *msgs[3] = { "an unneeded SubClass entry", "an unneeded Protocol entry", "unneeded SubClass and Protocol entries"}; struct usb_device_descriptor *ddesc = &dev->descriptor; int msg = -1; if (unusual_dev->useProtocol != USB_SC_DEVICE && us->subclass == idesc->bInterfaceSubClass) msg += 1; if (unusual_dev->useTransport != USB_PR_DEVICE && us->protocol == idesc->bInterfaceProtocol) msg += 2; if (msg >= 0 && !(us->fflags & US_FL_NEED_OVERRIDE)) dev_notice(pdev, "This device " "(%04x,%04x,%04x S %02x P %02x)" " has %s in unusual_devs.h (kernel" " %s)\n" " Please send a copy of this message to " "<linux-usb@vger.kernel.org> and " "<usb-storage@lists.one-eyed-alien.net>\n", le16_to_cpu(ddesc->idVendor), le16_to_cpu(ddesc->idProduct), le16_to_cpu(ddesc->bcdDevice), idesc->bInterfaceSubClass, idesc->bInterfaceProtocol, msgs[msg], utsname()->release); } return 0; } /* Get the transport settings */ static void get_transport(struct us_data *us) { switch (us->protocol) { case USB_PR_CB: us->transport_name = "Control/Bulk"; us->transport = usb_stor_CB_transport; us->transport_reset = usb_stor_CB_reset; us->max_lun = 7; break; case USB_PR_CBI: us->transport_name = "Control/Bulk/Interrupt"; us->transport = usb_stor_CB_transport; us->transport_reset = usb_stor_CB_reset; us->max_lun = 7; break; case USB_PR_BULK: us->transport_name = "Bulk"; us->transport = usb_stor_Bulk_transport; us->transport_reset = usb_stor_Bulk_reset; break; } } /* Get the protocol settings */ static void get_protocol(struct us_data *us) { switch (us->subclass) { case USB_SC_RBC: us->protocol_name = "Reduced Block Commands (RBC)"; us->proto_handler = usb_stor_transparent_scsi_command; break; case USB_SC_8020: us->protocol_name = "8020i"; us->proto_handler = usb_stor_pad12_command; us->max_lun = 0; break; case USB_SC_QIC: us->protocol_name = "QIC-157"; us->proto_handler = usb_stor_pad12_command; us->max_lun = 0; break; case USB_SC_8070: us->protocol_name = "8070i"; us->proto_handler = usb_stor_pad12_command; us->max_lun = 0; break; case USB_SC_SCSI: us->protocol_name = "Transparent SCSI"; us->proto_handler = usb_stor_transparent_scsi_command; break; case USB_SC_UFI: us->protocol_name = "Uniform Floppy Interface (UFI)"; us->proto_handler = usb_stor_ufi_command; break; } } /* Get the pipe settings */ static int get_pipes(struct us_data *us) { struct usb_host_interface *alt = us->pusb_intf->cur_altsetting; struct usb_endpoint_descriptor *ep_in; struct usb_endpoint_descriptor *ep_out; struct usb_endpoint_descriptor *ep_int; int res; /* * Find the first endpoint of each type we need. * We are expecting a minimum of 2 endpoints - in and out (bulk). * An optional interrupt-in is OK (necessary for CBI protocol). * We will ignore any others. */ res = usb_find_common_endpoints(alt, &ep_in, &ep_out, NULL, NULL); if (res) { usb_stor_dbg(us, "bulk endpoints not found\n"); return res; } res = usb_find_int_in_endpoint(alt, &ep_int); if (res && us->protocol == USB_PR_CBI) { usb_stor_dbg(us, "interrupt endpoint not found\n"); return res; } /* Calculate and store the pipe values */ us->send_ctrl_pipe = usb_sndctrlpipe(us->pusb_dev, 0); us->recv_ctrl_pipe = usb_rcvctrlpipe(us->pusb_dev, 0); us->send_bulk_pipe = usb_sndbulkpipe(us->pusb_dev, usb_endpoint_num(ep_out)); us->recv_bulk_pipe = usb_rcvbulkpipe(us->pusb_dev, usb_endpoint_num(ep_in)); if (ep_int) { us->recv_intr_pipe = usb_rcvintpipe(us->pusb_dev, usb_endpoint_num(ep_int)); us->ep_bInterval = ep_int->bInterval; } return 0; } /* Initialize all the dynamic resources we need */ static int usb_stor_acquire_resources(struct us_data *us) { int p; struct task_struct *th; us->current_urb = usb_alloc_urb(0, GFP_KERNEL); if (!us->current_urb) return -ENOMEM; /* * Just before we start our control thread, initialize * the device if it needs initialization */ if (us->unusual_dev->initFunction) { p = us->unusual_dev->initFunction(us); if (p) return p; } /* Start up our control thread */ th = kthread_run(usb_stor_control_thread, us, "usb-storage"); if (IS_ERR(th)) { dev_warn(&us->pusb_intf->dev, "Unable to start control thread\n"); return PTR_ERR(th); } us->ctl_thread = th; return 0; } /* Release all our dynamic resources */ static void usb_stor_release_resources(struct us_data *us) { /* * Tell the control thread to exit. The SCSI host must * already have been removed and the DISCONNECTING flag set * so that we won't accept any more commands. */ usb_stor_dbg(us, "-- sending exit command to thread\n"); complete(&us->cmnd_ready); if (us->ctl_thread) kthread_stop(us->ctl_thread); /* Call the destructor routine, if it exists */ if (us->extra_destructor) { usb_stor_dbg(us, "-- calling extra_destructor()\n"); us->extra_destructor(us->extra); } /* Free the extra data and the URB */ kfree(us->extra); usb_free_urb(us->current_urb); } /* Dissociate from the USB device */ static void dissociate_dev(struct us_data *us) { /* Free the buffers */ kfree(us->cr); usb_free_coherent(us->pusb_dev, US_IOBUF_SIZE, us->iobuf, us->iobuf_dma); /* Remove our private data from the interface */ usb_set_intfdata(us->pusb_intf, NULL); } /* * First stage of disconnect processing: stop SCSI scanning, * remove the host, and stop accepting new commands */ static void quiesce_and_remove_host(struct us_data *us) { struct Scsi_Host *host = us_to_host(us); /* If the device is really gone, cut short reset delays */ if (us->pusb_dev->state == USB_STATE_NOTATTACHED) { set_bit(US_FLIDX_DISCONNECTING, &us->dflags); wake_up(&us->delay_wait); } /* * Prevent SCSI scanning (if it hasn't started yet) * or wait for the SCSI-scanning routine to stop. */ cancel_delayed_work_sync(&us->scan_dwork); /* Balance autopm calls if scanning was cancelled */ if (test_bit(US_FLIDX_SCAN_PENDING, &us->dflags)) usb_autopm_put_interface_no_suspend(us->pusb_intf); /* * Removing the host will perform an orderly shutdown: caches * synchronized, disks spun down, etc. */ scsi_remove_host(host); /* * Prevent any new commands from being accepted and cut short * reset delays. */ scsi_lock(host); set_bit(US_FLIDX_DISCONNECTING, &us->dflags); scsi_unlock(host); wake_up(&us->delay_wait); } /* Second stage of disconnect processing: deallocate all resources */ static void release_everything(struct us_data *us) { usb_stor_release_resources(us); dissociate_dev(us); /* * Drop our reference to the host; the SCSI core will free it * (and "us" along with it) when the refcount becomes 0. */ scsi_host_put(us_to_host(us)); } /* Delayed-work routine to carry out SCSI-device scanning */ static void usb_stor_scan_dwork(struct work_struct *work) { struct us_data *us = container_of(work, struct us_data, scan_dwork.work); struct device *dev = &us->pusb_intf->dev; dev_dbg(dev, "starting scan\n"); /* For bulk-only devices, determine the max LUN value */ if (us->protocol == USB_PR_BULK && !(us->fflags & US_FL_SINGLE_LUN) && !(us->fflags & US_FL_SCM_MULT_TARG)) { mutex_lock(&us->dev_mutex); us->max_lun = usb_stor_Bulk_max_lun(us); /* * Allow proper scanning of devices that present more than 8 LUNs * While not affecting other devices that may need the previous * behavior */ if (us->max_lun >= 8) us_to_host(us)->max_lun = us->max_lun+1; mutex_unlock(&us->dev_mutex); } scsi_scan_host(us_to_host(us)); dev_dbg(dev, "scan complete\n"); /* Should we unbind if no devices were detected? */ usb_autopm_put_interface(us->pusb_intf); clear_bit(US_FLIDX_SCAN_PENDING, &us->dflags); } static unsigned int usb_stor_sg_tablesize(struct usb_interface *intf) { struct usb_device *usb_dev = interface_to_usbdev(intf); if (usb_dev->bus->sg_tablesize) { return usb_dev->bus->sg_tablesize; } return SG_ALL; } /* First part of general USB mass-storage probing */ int usb_stor_probe1(struct us_data **pus, struct usb_interface *intf, const struct usb_device_id *id, const struct us_unusual_dev *unusual_dev, const struct scsi_host_template *sht) { struct Scsi_Host *host; struct us_data *us; int result; dev_info(&intf->dev, "USB Mass Storage device detected\n"); /* * Ask the SCSI layer to allocate a host structure, with extra * space at the end for our private us_data structure. */ host = scsi_host_alloc(sht, sizeof(*us)); if (!host) { dev_warn(&intf->dev, "Unable to allocate the scsi host\n"); return -ENOMEM; } /* * Allow 16-byte CDBs and thus > 2TB */ host->max_cmd_len = 16; host->sg_tablesize = usb_stor_sg_tablesize(intf); *pus = us = host_to_us(host); mutex_init(&(us->dev_mutex)); us_set_lock_class(&us->dev_mutex, intf); init_completion(&us->cmnd_ready); init_completion(&(us->notify)); init_waitqueue_head(&us->delay_wait); INIT_DELAYED_WORK(&us->scan_dwork, usb_stor_scan_dwork); /* Associate the us_data structure with the USB device */ result = associate_dev(us, intf); if (result) goto BadDevice; /* * Some USB host controllers can't do DMA: They have to use PIO, or they * have to use a small dedicated local memory area, or they have other * restrictions on addressable memory. * * We can't support these controllers on highmem systems as we don't * kmap or bounce buffer. */ if (IS_ENABLED(CONFIG_HIGHMEM) && (!hcd_uses_dma(bus_to_hcd(us->pusb_dev->bus)) || bus_to_hcd(us->pusb_dev->bus)->localmem_pool)) { dev_warn(&intf->dev, "USB Mass Storage not supported on this host controller\n"); result = -EINVAL; goto release; } /* Get the unusual_devs entries and the descriptors */ result = get_device_info(us, id, unusual_dev); if (result) goto BadDevice; /* Get standard transport and protocol settings */ get_transport(us); get_protocol(us); /* * Give the caller a chance to fill in specialized transport * or protocol settings. */ return 0; BadDevice: usb_stor_dbg(us, "storage_probe() failed\n"); release: release_everything(us); return result; } EXPORT_SYMBOL_GPL(usb_stor_probe1); /* Second part of general USB mass-storage probing */ int usb_stor_probe2(struct us_data *us) { int result; struct device *dev = &us->pusb_intf->dev; /* Make sure the transport and protocol have both been set */ if (!us->transport || !us->proto_handler) { result = -ENXIO; goto BadDevice; } usb_stor_dbg(us, "Transport: %s\n", us->transport_name); usb_stor_dbg(us, "Protocol: %s\n", us->protocol_name); if (us->fflags & US_FL_SCM_MULT_TARG) { /* * SCM eUSCSI bridge devices can have different numbers * of LUNs on different targets; allow all to be probed. */ us->max_lun = 7; /* The eUSCSI itself has ID 7, so avoid scanning that */ us_to_host(us)->this_id = 7; /* max_id is 8 initially, so no need to set it here */ } else { /* In the normal case there is only a single target */ us_to_host(us)->max_id = 1; /* * Like Windows, we won't store the LUN bits in CDB[1] for * SCSI-2 devices using the Bulk-Only transport (even though * this violates the SCSI spec). */ if (us->transport == usb_stor_Bulk_transport) us_to_host(us)->no_scsi2_lun_in_cdb = 1; } /* fix for single-lun devices */ if (us->fflags & US_FL_SINGLE_LUN) us->max_lun = 0; /* Find the endpoints and calculate pipe values */ result = get_pipes(us); if (result) goto BadDevice; /* * If the device returns invalid data for the first READ(10) * command, indicate the command should be retried. */ if (us->fflags & US_FL_INITIAL_READ10) set_bit(US_FLIDX_REDO_READ10, &us->dflags); /* Acquire all the other resources and add the host */ result = usb_stor_acquire_resources(us); if (result) goto BadDevice; usb_autopm_get_interface_no_resume(us->pusb_intf); snprintf(us->scsi_name, sizeof(us->scsi_name), "usb-storage %s", dev_name(&us->pusb_intf->dev)); result = scsi_add_host(us_to_host(us), dev); if (result) { dev_warn(dev, "Unable to add the scsi host\n"); goto HostAddErr; } /* Submit the delayed_work for SCSI-device scanning */ set_bit(US_FLIDX_SCAN_PENDING, &us->dflags); if (delay_use > 0) dev_dbg(dev, "waiting for device to settle before scanning\n"); queue_delayed_work(system_freezable_wq, &us->scan_dwork, msecs_to_jiffies(delay_use)); return 0; /* We come here if there are any problems */ HostAddErr: usb_autopm_put_interface_no_suspend(us->pusb_intf); BadDevice: usb_stor_dbg(us, "storage_probe() failed\n"); release_everything(us); return result; } EXPORT_SYMBOL_GPL(usb_stor_probe2); /* Handle a USB mass-storage disconnect */ void usb_stor_disconnect(struct usb_interface *intf) { struct us_data *us = usb_get_intfdata(intf); quiesce_and_remove_host(us); release_everything(us); } EXPORT_SYMBOL_GPL(usb_stor_disconnect); static struct scsi_host_template usb_stor_host_template; /* The main probe routine for standard devices */ static int storage_probe(struct usb_interface *intf, const struct usb_device_id *id) { const struct us_unusual_dev *unusual_dev; struct us_data *us; int result; int size; /* If uas is enabled and this device can do uas then ignore it. */ #if IS_ENABLED(CONFIG_USB_UAS) if (uas_use_uas_driver(intf, id, NULL)) return -ENXIO; #endif /* * If the device isn't standard (is handled by a subdriver * module) then don't accept it. */ if (usb_usual_ignore_device(intf)) return -ENXIO; /* * Call the general probe procedures. * * The unusual_dev_list array is parallel to the usb_storage_usb_ids * table, so we use the index of the id entry to find the * corresponding unusual_devs entry. */ size = ARRAY_SIZE(us_unusual_dev_list); if (id >= usb_storage_usb_ids && id < usb_storage_usb_ids + size) { unusual_dev = (id - usb_storage_usb_ids) + us_unusual_dev_list; } else { unusual_dev = &for_dynamic_ids; dev_dbg(&intf->dev, "Use Bulk-Only transport with the Transparent SCSI protocol for dynamic id: 0x%04x 0x%04x\n", id->idVendor, id->idProduct); } result = usb_stor_probe1(&us, intf, id, unusual_dev, &usb_stor_host_template); if (result) return result; /* No special transport or protocol settings in the main module */ result = usb_stor_probe2(us); return result; } static struct usb_driver usb_storage_driver = { .name = DRV_NAME, .probe = storage_probe, .disconnect = usb_stor_disconnect, .suspend = usb_stor_suspend, .resume = usb_stor_resume, .reset_resume = usb_stor_reset_resume, .pre_reset = usb_stor_pre_reset, .post_reset = usb_stor_post_reset, .id_table = usb_storage_usb_ids, .supports_autosuspend = 1, .soft_unbind = 1, }; module_usb_stor_driver(usb_storage_driver, usb_stor_host_template, DRV_NAME); |
| 338 92 38 10 10 1 4 28 262 26 5 53 1 81 11 81 9 177 174 18 18 22 271 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_GENERIC_NETLINK_H #define __NET_GENERIC_NETLINK_H #include <linux/net.h> #include <net/netlink.h> #include <net/net_namespace.h> #include <uapi/linux/genetlink.h> #define GENLMSG_DEFAULT_SIZE (NLMSG_DEFAULT_SIZE - GENL_HDRLEN) /* Non-parallel generic netlink requests are serialized by a global lock. */ void genl_lock(void); void genl_unlock(void); #define MODULE_ALIAS_GENL_FAMILY(family) \ MODULE_ALIAS_NET_PF_PROTO_NAME(PF_NETLINK, NETLINK_GENERIC, "-family-" family) /* Binding to multicast group requires %CAP_NET_ADMIN */ #define GENL_MCAST_CAP_NET_ADMIN BIT(0) /* Binding to multicast group requires %CAP_SYS_ADMIN */ #define GENL_MCAST_CAP_SYS_ADMIN BIT(1) /** * struct genl_multicast_group - generic netlink multicast group * @name: name of the multicast group, names are per-family * @flags: GENL_MCAST_* flags */ struct genl_multicast_group { char name[GENL_NAMSIZ]; u8 flags; }; struct genl_split_ops; struct genl_info; /** * struct genl_family - generic netlink family * @hdrsize: length of user specific header in bytes * @name: name of family * @version: protocol version * @maxattr: maximum number of attributes supported * @policy: netlink policy * @netnsok: set to true if the family can handle network * namespaces and should be presented in all of them * @parallel_ops: operations can be called in parallel and aren't * synchronized by the core genetlink code * @pre_doit: called before an operation's doit callback, it may * do additional, common, filtering and return an error * @post_doit: called after an operation's doit callback, it may * undo operations done by pre_doit, for example release locks * @bind: called when family multicast group is added to a netlink socket * @unbind: called when family multicast group is removed from a netlink socket * @module: pointer to the owning module (set to THIS_MODULE) * @mcgrps: multicast groups used by this family * @n_mcgrps: number of multicast groups * @resv_start_op: first operation for which reserved fields of the header * can be validated and policies are required (see below); * new families should leave this field at zero * @ops: the operations supported by this family * @n_ops: number of operations supported by this family * @small_ops: the small-struct operations supported by this family * @n_small_ops: number of small-struct operations supported by this family * @split_ops: the split do/dump form of operation definition * @n_split_ops: number of entries in @split_ops, not that with split do/dump * ops the number of entries is not the same as number of commands * @sock_priv_size: the size of per-socket private memory * @sock_priv_init: the per-socket private memory initializer * @sock_priv_destroy: the per-socket private memory destructor * * Attribute policies (the combination of @policy and @maxattr fields) * can be attached at the family level or at the operation level. * If both are present the per-operation policy takes precedence. * For operations before @resv_start_op lack of policy means that the core * will perform no attribute parsing or validation. For newer operations * if policy is not provided core will reject all TLV attributes. */ struct genl_family { unsigned int hdrsize; char name[GENL_NAMSIZ]; unsigned int version; unsigned int maxattr; u8 netnsok:1; u8 parallel_ops:1; u8 n_ops; u8 n_small_ops; u8 n_split_ops; u8 n_mcgrps; u8 resv_start_op; const struct nla_policy *policy; int (*pre_doit)(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); void (*post_doit)(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); int (*bind)(int mcgrp); void (*unbind)(int mcgrp); const struct genl_ops * ops; const struct genl_small_ops *small_ops; const struct genl_split_ops *split_ops; const struct genl_multicast_group *mcgrps; struct module *module; size_t sock_priv_size; void (*sock_priv_init)(void *priv); void (*sock_priv_destroy)(void *priv); /* private: internal use only */ /* protocol family identifier */ int id; /* starting number of multicast group IDs in this family */ unsigned int mcgrp_offset; /* list of per-socket privs */ struct xarray *sock_privs; }; /** * struct genl_info - receiving information * @snd_seq: sending sequence number * @snd_portid: netlink portid of sender * @family: generic netlink family * @nlhdr: netlink message header * @genlhdr: generic netlink message header * @attrs: netlink attributes * @_net: network namespace * @ctx: storage space for the use by the family * @user_ptr: user pointers (deprecated, use ctx instead) * @extack: extended ACK report struct */ struct genl_info { u32 snd_seq; u32 snd_portid; const struct genl_family *family; const struct nlmsghdr * nlhdr; struct genlmsghdr * genlhdr; struct nlattr ** attrs; possible_net_t _net; union { u8 ctx[NETLINK_CTX_SIZE]; void * user_ptr[2]; }; struct netlink_ext_ack *extack; }; static inline struct net *genl_info_net(const struct genl_info *info) { return read_pnet(&info->_net); } static inline void genl_info_net_set(struct genl_info *info, struct net *net) { write_pnet(&info->_net, net); } static inline void *genl_info_userhdr(const struct genl_info *info) { return (u8 *)info->genlhdr + GENL_HDRLEN; } #define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg) #define GENL_SET_ERR_MSG_FMT(info, msg, args...) \ NL_SET_ERR_MSG_FMT((info)->extack, msg, ##args) /* Report that a root attribute is missing */ #define GENL_REQ_ATTR_CHECK(info, attr) ({ \ const struct genl_info *__info = (info); \ \ NL_REQ_ATTR_CHECK(__info->extack, NULL, __info->attrs, (attr)); \ }) enum genl_validate_flags { GENL_DONT_VALIDATE_STRICT = BIT(0), GENL_DONT_VALIDATE_DUMP = BIT(1), GENL_DONT_VALIDATE_DUMP_STRICT = BIT(2), }; /** * struct genl_small_ops - generic netlink operations (small version) * @cmd: command identifier * @internal_flags: flags used by the family * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM) * @validate: validation flags from enum genl_validate_flags * @doit: standard command callback * @dumpit: callback for dumpers * * This is a cut-down version of struct genl_ops for users who don't need * most of the ancillary infra and want to save space. */ struct genl_small_ops { int (*doit)(struct sk_buff *skb, struct genl_info *info); int (*dumpit)(struct sk_buff *skb, struct netlink_callback *cb); u8 cmd; u8 internal_flags; u8 flags; u8 validate; }; /** * struct genl_ops - generic netlink operations * @cmd: command identifier * @internal_flags: flags used by the family * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM) * @maxattr: maximum number of attributes supported * @policy: netlink policy (takes precedence over family policy) * @validate: validation flags from enum genl_validate_flags * @doit: standard command callback * @start: start callback for dumps * @dumpit: callback for dumpers * @done: completion callback for dumps */ struct genl_ops { int (*doit)(struct sk_buff *skb, struct genl_info *info); int (*start)(struct netlink_callback *cb); int (*dumpit)(struct sk_buff *skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); const struct nla_policy *policy; unsigned int maxattr; u8 cmd; u8 internal_flags; u8 flags; u8 validate; }; /** * struct genl_split_ops - generic netlink operations (do/dump split version) * @cmd: command identifier * @internal_flags: flags used by the family * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM) * @validate: validation flags from enum genl_validate_flags * @policy: netlink policy (takes precedence over family policy) * @maxattr: maximum number of attributes supported * * Do callbacks: * @pre_doit: called before an operation's @doit callback, it may * do additional, common, filtering and return an error * @doit: standard command callback * @post_doit: called after an operation's @doit callback, it may * undo operations done by pre_doit, for example release locks * * Dump callbacks: * @start: start callback for dumps * @dumpit: callback for dumpers * @done: completion callback for dumps * * Do callbacks can be used if %GENL_CMD_CAP_DO is set in @flags. * Dump callbacks can be used if %GENL_CMD_CAP_DUMP is set in @flags. * Exactly one of those flags must be set. */ struct genl_split_ops { union { struct { int (*pre_doit)(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); int (*doit)(struct sk_buff *skb, struct genl_info *info); void (*post_doit)(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); }; struct { int (*start)(struct netlink_callback *cb); int (*dumpit)(struct sk_buff *skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); }; }; const struct nla_policy *policy; unsigned int maxattr; u8 cmd; u8 internal_flags; u8 flags; u8 validate; }; /** * struct genl_dumpit_info - info that is available during dumpit op call * @op: generic netlink ops - for internal genl code usage * @attrs: netlink attributes * @info: struct genl_info describing the request */ struct genl_dumpit_info { struct genl_split_ops op; struct genl_info info; }; static inline const struct genl_dumpit_info * genl_dumpit_info(struct netlink_callback *cb) { return cb->data; } static inline const struct genl_info * genl_info_dump(struct netlink_callback *cb) { return &genl_dumpit_info(cb)->info; } /** * genl_info_init_ntf() - initialize genl_info for notifications * @info: genl_info struct to set up * @family: pointer to the genetlink family * @cmd: command to be used in the notification * * Initialize a locally declared struct genl_info to pass to various APIs. * Intended to be used when creating notifications. */ static inline void genl_info_init_ntf(struct genl_info *info, const struct genl_family *family, u8 cmd) { struct genlmsghdr *hdr = (void *) &info->user_ptr[0]; memset(info, 0, sizeof(*info)); info->family = family; info->genlhdr = hdr; hdr->cmd = cmd; } static inline bool genl_info_is_ntf(const struct genl_info *info) { return !info->nlhdr; } void *__genl_sk_priv_get(struct genl_family *family, struct sock *sk); void *genl_sk_priv_get(struct genl_family *family, struct sock *sk); int genl_register_family(struct genl_family *family); int genl_unregister_family(const struct genl_family *family); void genl_notify(const struct genl_family *family, struct sk_buff *skb, struct genl_info *info, u32 group, gfp_t flags); void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, const struct genl_family *family, int flags, u8 cmd); static inline void * __genlmsg_iput(struct sk_buff *skb, const struct genl_info *info, int flags) { return genlmsg_put(skb, info->snd_portid, info->snd_seq, info->family, flags, info->genlhdr->cmd); } /** * genlmsg_iput - start genetlink message based on genl_info * @skb: skb in which message header will be placed * @info: genl_info as provided to do/dump handlers * * Convenience wrapper which starts a genetlink message based on * information in user request. @info should be either the struct passed * by genetlink core to do/dump handlers (when constructing replies to * such requests) or a struct initialized by genl_info_init_ntf() * when constructing notifications. * * Returns: pointer to new genetlink header. */ static inline void * genlmsg_iput(struct sk_buff *skb, const struct genl_info *info) { return __genlmsg_iput(skb, info, 0); } /** * genlmsg_nlhdr - Obtain netlink header from user specified header * @user_hdr: user header as returned from genlmsg_put() * * Returns: pointer to netlink header. */ static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr) { return (struct nlmsghdr *)((char *)user_hdr - GENL_HDRLEN - NLMSG_HDRLEN); } /** * genlmsg_parse_deprecated - parse attributes of a genetlink message * @nlh: netlink message header * @family: genetlink message family * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @policy: validation policy * @extack: extended ACK report struct */ static inline int genlmsg_parse_deprecated(const struct nlmsghdr *nlh, const struct genl_family *family, struct nlattr *tb[], int maxtype, const struct nla_policy *policy, struct netlink_ext_ack *extack) { return __nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype, policy, NL_VALIDATE_LIBERAL, extack); } /** * genlmsg_parse - parse attributes of a genetlink message * @nlh: netlink message header * @family: genetlink message family * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @policy: validation policy * @extack: extended ACK report struct */ static inline int genlmsg_parse(const struct nlmsghdr *nlh, const struct genl_family *family, struct nlattr *tb[], int maxtype, const struct nla_policy *policy, struct netlink_ext_ack *extack) { return __nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype, policy, NL_VALIDATE_STRICT, extack); } /** * genl_dump_check_consistent - check if sequence is consistent and advertise if not * @cb: netlink callback structure that stores the sequence number * @user_hdr: user header as returned from genlmsg_put() * * Cf. nl_dump_check_consistent(), this just provides a wrapper to make it * simpler to use with generic netlink. */ static inline void genl_dump_check_consistent(struct netlink_callback *cb, void *user_hdr) { nl_dump_check_consistent(cb, genlmsg_nlhdr(user_hdr)); } /** * genlmsg_put_reply - Add generic netlink header to a reply message * @skb: socket buffer holding the message * @info: receiver info * @family: generic netlink family * @flags: netlink message flags * @cmd: generic netlink command * * Returns: pointer to user specific header */ static inline void *genlmsg_put_reply(struct sk_buff *skb, struct genl_info *info, const struct genl_family *family, int flags, u8 cmd) { return genlmsg_put(skb, info->snd_portid, info->snd_seq, family, flags, cmd); } /** * genlmsg_end - Finalize a generic netlink message * @skb: socket buffer the message is stored in * @hdr: user specific header */ static inline void genlmsg_end(struct sk_buff *skb, void *hdr) { nlmsg_end(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN); } /** * genlmsg_cancel - Cancel construction of a generic netlink message * @skb: socket buffer the message is stored in * @hdr: generic netlink message header */ static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr) { if (hdr) nlmsg_cancel(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN); } /** * genlmsg_multicast_netns_filtered - multicast a netlink message * to a specific netns with filter * function * @family: the generic netlink family * @net: the net namespace * @skb: netlink message as socket buffer * @portid: own netlink portid to avoid sending to yourself * @group: offset of multicast group in groups array * @flags: allocation flags * @filter: filter function * @filter_data: filter function private data * * Return: 0 on success, negative error code for failure. */ static inline int genlmsg_multicast_netns_filtered(const struct genl_family *family, struct net *net, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags, netlink_filter_fn filter, void *filter_data) { if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; group = family->mcgrp_offset + group; return nlmsg_multicast_filtered(net->genl_sock, skb, portid, group, flags, filter, filter_data); } /** * genlmsg_multicast_netns - multicast a netlink message to a specific netns * @family: the generic netlink family * @net: the net namespace * @skb: netlink message as socket buffer * @portid: own netlink portid to avoid sending to yourself * @group: offset of multicast group in groups array * @flags: allocation flags */ static inline int genlmsg_multicast_netns(const struct genl_family *family, struct net *net, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { return genlmsg_multicast_netns_filtered(family, net, skb, portid, group, flags, NULL, NULL); } /** * genlmsg_multicast - multicast a netlink message to the default netns * @family: the generic netlink family * @skb: netlink message as socket buffer * @portid: own netlink portid to avoid sending to yourself * @group: offset of multicast group in groups array * @flags: allocation flags */ static inline int genlmsg_multicast(const struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { return genlmsg_multicast_netns(family, &init_net, skb, portid, group, flags); } /** * genlmsg_multicast_allns - multicast a netlink message to all net namespaces * @family: the generic netlink family * @skb: netlink message as socket buffer * @portid: own netlink portid to avoid sending to yourself * @group: offset of multicast group in groups array * * This function must hold the RTNL or rcu_read_lock(). */ int genlmsg_multicast_allns(const struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group); /** * genlmsg_unicast - unicast a netlink message * @net: network namespace to look up @portid in * @skb: netlink message as socket buffer * @portid: netlink portid of the destination socket */ static inline int genlmsg_unicast(struct net *net, struct sk_buff *skb, u32 portid) { return nlmsg_unicast(net->genl_sock, skb, portid); } /** * genlmsg_reply - reply to a request * @skb: netlink message to be sent back * @info: receiver information */ static inline int genlmsg_reply(struct sk_buff *skb, struct genl_info *info) { return genlmsg_unicast(genl_info_net(info), skb, info->snd_portid); } /** * genlmsg_data - head of message payload * @gnlh: genetlink message header */ static inline void *genlmsg_data(const struct genlmsghdr *gnlh) { return ((unsigned char *) gnlh + GENL_HDRLEN); } /** * genlmsg_len - length of message payload * @gnlh: genetlink message header */ static inline int genlmsg_len(const struct genlmsghdr *gnlh) { struct nlmsghdr *nlh = (struct nlmsghdr *)((unsigned char *)gnlh - NLMSG_HDRLEN); return (nlh->nlmsg_len - GENL_HDRLEN - NLMSG_HDRLEN); } /** * genlmsg_msg_size - length of genetlink message not including padding * @payload: length of message payload */ static inline int genlmsg_msg_size(int payload) { return GENL_HDRLEN + payload; } /** * genlmsg_total_size - length of genetlink message including padding * @payload: length of message payload */ static inline int genlmsg_total_size(int payload) { return NLMSG_ALIGN(genlmsg_msg_size(payload)); } /** * genlmsg_new - Allocate a new generic netlink message * @payload: size of the message payload * @flags: the type of memory to allocate. */ static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags) { return nlmsg_new(genlmsg_total_size(payload), flags); } /** * genl_set_err - report error to genetlink broadcast listeners * @family: the generic netlink family * @net: the network namespace to report the error to * @portid: the PORTID of a process that we want to skip (if any) * @group: the broadcast group that will notice the error * (this is the offset of the multicast group in the groups array) * @code: error code, must be negative (as usual in kernelspace) * * This function returns the number of broadcast listeners that have set the * NETLINK_RECV_NO_ENOBUFS socket option. */ static inline int genl_set_err(const struct genl_family *family, struct net *net, u32 portid, u32 group, int code) { if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; group = family->mcgrp_offset + group; return netlink_set_err(net->genl_sock, portid, group, code); } static inline int genl_has_listeners(const struct genl_family *family, struct net *net, unsigned int group) { if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; group = family->mcgrp_offset + group; return netlink_has_listeners(net->genl_sock, group); } #endif /* __NET_GENERIC_NETLINK_H */ |
| 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * linux/fs/pnode.h * * (C) Copyright IBM Corporation 2005. */ #ifndef _LINUX_PNODE_H #define _LINUX_PNODE_H #include <linux/list.h> #include "mount.h" #define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED) #define IS_MNT_SLAVE(m) ((m)->mnt_master) #define IS_MNT_NEW(m) (!(m)->mnt_ns) #define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED) #define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE) #define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED) #define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED) #define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED) #define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED) #define CL_EXPIRE 0x01 #define CL_SLAVE 0x02 #define CL_COPY_UNBINDABLE 0x04 #define CL_MAKE_SHARED 0x08 #define CL_PRIVATE 0x10 #define CL_SHARED_TO_SLAVE 0x20 #define CL_COPY_MNT_NS_FILE 0x40 #define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE) static inline void set_mnt_shared(struct mount *mnt) { mnt->mnt.mnt_flags &= ~MNT_SHARED_MASK; mnt->mnt.mnt_flags |= MNT_SHARED; } void change_mnt_propagation(struct mount *, int); int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, struct hlist_head *); int propagate_umount(struct list_head *); int propagate_mount_busy(struct mount *, int); void propagate_mount_unlock(struct mount *); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); int mnt_get_count(struct mount *mnt); void mnt_set_mountpoint(struct mount *, struct mountpoint *, struct mount *); void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt); struct mount *copy_tree(struct mount *, struct dentry *, int); bool is_path_reachable(struct mount *, struct dentry *, const struct path *root); int count_mounts(struct mnt_namespace *ns, struct mount *mnt); bool propagation_would_overmount(const struct mount *from, const struct mount *to, const struct mountpoint *mp); #endif /* _LINUX_PNODE_H */ |
| 2 2 2 2 2 2 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 | // SPDX-License-Identifier: GPL-2.0-or-later /* DataCenter TCP (DCTCP) congestion control. * * http://simula.stanford.edu/~alizade/Site/DCTCP.html * * This is an implementation of DCTCP over Reno, an enhancement to the * TCP congestion control algorithm designed for data centers. DCTCP * leverages Explicit Congestion Notification (ECN) in the network to * provide multi-bit feedback to the end hosts. DCTCP's goal is to meet * the following three data center transport requirements: * * - High burst tolerance (incast due to partition/aggregate) * - Low latency (short flows, queries) * - High throughput (continuous data updates, large file transfers) * with commodity shallow buffered switches * * The algorithm is described in detail in the following two papers: * * 1) Mohammad Alizadeh, Albert Greenberg, David A. Maltz, Jitendra Padhye, * Parveen Patel, Balaji Prabhakar, Sudipta Sengupta, and Murari Sridharan: * "Data Center TCP (DCTCP)", Data Center Networks session * Proc. ACM SIGCOMM, New Delhi, 2010. * http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp-final.pdf * * 2) Mohammad Alizadeh, Adel Javanmard, and Balaji Prabhakar: * "Analysis of DCTCP: Stability, Convergence, and Fairness" * Proc. ACM SIGMETRICS, San Jose, 2011. * http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp_analysis-full.pdf * * Initial prototype from Abdul Kabbani, Masato Yasuda and Mohammad Alizadeh. * * Authors: * * Daniel Borkmann <dborkman@redhat.com> * Florian Westphal <fw@strlen.de> * Glenn Judd <glenn.judd@morganstanley.com> */ #include <linux/btf.h> #include <linux/btf_ids.h> #include <linux/module.h> #include <linux/mm.h> #include <net/tcp.h> #include <linux/inet_diag.h> #include "tcp_dctcp.h" #define DCTCP_MAX_ALPHA 1024U struct dctcp { u32 old_delivered; u32 old_delivered_ce; u32 prior_rcv_nxt; u32 dctcp_alpha; u32 next_seq; u32 ce_state; u32 loss_cwnd; struct tcp_plb_state plb; }; static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */ static int dctcp_shift_g_set(const char *val, const struct kernel_param *kp) { return param_set_uint_minmax(val, kp, 0, 10); } static const struct kernel_param_ops dctcp_shift_g_ops = { .set = dctcp_shift_g_set, .get = param_get_uint, }; module_param_cb(dctcp_shift_g, &dctcp_shift_g_ops, &dctcp_shift_g, 0644); MODULE_PARM_DESC(dctcp_shift_g, "parameter g for updating dctcp_alpha"); static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA; module_param(dctcp_alpha_on_init, uint, 0644); MODULE_PARM_DESC(dctcp_alpha_on_init, "parameter for initial alpha value"); static struct tcp_congestion_ops dctcp_reno; static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca) { ca->next_seq = tp->snd_nxt; ca->old_delivered = tp->delivered; ca->old_delivered_ce = tp->delivered_ce; } __bpf_kfunc static void dctcp_init(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); if (tcp_ecn_mode_any(tp) || (sk->sk_state == TCP_LISTEN || sk->sk_state == TCP_CLOSE)) { struct dctcp *ca = inet_csk_ca(sk); ca->prior_rcv_nxt = tp->rcv_nxt; ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); ca->loss_cwnd = 0; ca->ce_state = 0; dctcp_reset(tp, ca); tcp_plb_init(sk, &ca->plb); return; } /* No ECN support? Fall back to Reno. Also need to clear * ECT from sk since it is set during 3WHS for DCTCP. */ inet_csk(sk)->icsk_ca_ops = &dctcp_reno; INET_ECN_dontxmit(sk); } __bpf_kfunc static u32 dctcp_ssthresh(struct sock *sk) { struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); ca->loss_cwnd = tcp_snd_cwnd(tp); return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * ca->dctcp_alpha) >> 11U), 2U); } __bpf_kfunc static void dctcp_update_alpha(struct sock *sk, u32 flags) { const struct tcp_sock *tp = tcp_sk(sk); struct dctcp *ca = inet_csk_ca(sk); /* Expired RTT */ if (!before(tp->snd_una, ca->next_seq)) { u32 delivered = tp->delivered - ca->old_delivered; u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce; u32 alpha = ca->dctcp_alpha; u32 ce_ratio = 0; if (delivered > 0) { /* dctcp_alpha keeps EWMA of fraction of ECN marked * packets. Because of EWMA smoothing, PLB reaction can * be slow so we use ce_ratio which is an instantaneous * measure of congestion. ce_ratio is the fraction of * ECN marked packets in the previous RTT. */ if (delivered_ce > 0) ce_ratio = (delivered_ce << TCP_PLB_SCALE) / delivered; tcp_plb_update_state(sk, &ca->plb, (int)ce_ratio); tcp_plb_check_rehash(sk, &ca->plb); } /* alpha = (1 - g) * alpha + g * F */ alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g); if (delivered_ce) { /* If dctcp_shift_g == 1, a 32bit value would overflow * after 8 M packets. */ delivered_ce <<= (10 - dctcp_shift_g); delivered_ce /= max(1U, delivered); alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA); } /* dctcp_alpha can be read from dctcp_get_info() without * synchro, so we ask compiler to not use dctcp_alpha * as a temporary variable in prior operations. */ WRITE_ONCE(ca->dctcp_alpha, alpha); dctcp_reset(tp, ca); } } static void dctcp_react_to_loss(struct sock *sk) { struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); ca->loss_cwnd = tcp_snd_cwnd(tp); tp->snd_ssthresh = max(tcp_snd_cwnd(tp) >> 1U, 2U); } __bpf_kfunc static void dctcp_state(struct sock *sk, u8 new_state) { if (new_state == TCP_CA_Recovery && new_state != inet_csk(sk)->icsk_ca_state) dctcp_react_to_loss(sk); /* We handle RTO in dctcp_cwnd_event to ensure that we perform only * one loss-adjustment per RTT. */ } __bpf_kfunc static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) { struct dctcp *ca = inet_csk_ca(sk); switch (ev) { case CA_EVENT_ECN_IS_CE: case CA_EVENT_ECN_NO_CE: dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state); break; case CA_EVENT_LOSS: tcp_plb_update_state_upon_rto(sk, &ca->plb); dctcp_react_to_loss(sk); break; case CA_EVENT_TX_START: tcp_plb_check_rehash(sk, &ca->plb); /* Maybe rehash when inflight is 0 */ break; default: /* Don't care for the rest. */ break; } } static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info) { const struct dctcp *ca = inet_csk_ca(sk); const struct tcp_sock *tp = tcp_sk(sk); /* Fill it also in case of VEGASINFO due to req struct limits. * We can still correctly retrieve it later. */ if (ext & (1 << (INET_DIAG_DCTCPINFO - 1)) || ext & (1 << (INET_DIAG_VEGASINFO - 1))) { memset(&info->dctcp, 0, sizeof(info->dctcp)); if (inet_csk(sk)->icsk_ca_ops != &dctcp_reno) { info->dctcp.dctcp_enabled = 1; info->dctcp.dctcp_ce_state = (u16) ca->ce_state; info->dctcp.dctcp_alpha = ca->dctcp_alpha; info->dctcp.dctcp_ab_ecn = tp->mss_cache * (tp->delivered_ce - ca->old_delivered_ce); info->dctcp.dctcp_ab_tot = tp->mss_cache * (tp->delivered - ca->old_delivered); } *attr = INET_DIAG_DCTCPINFO; return sizeof(info->dctcp); } return 0; } __bpf_kfunc static u32 dctcp_cwnd_undo(struct sock *sk) { const struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); return max(tcp_snd_cwnd(tp), ca->loss_cwnd); } static struct tcp_congestion_ops dctcp __read_mostly = { .init = dctcp_init, .in_ack_event = dctcp_update_alpha, .cwnd_event = dctcp_cwnd_event, .ssthresh = dctcp_ssthresh, .cong_avoid = tcp_reno_cong_avoid, .undo_cwnd = dctcp_cwnd_undo, .set_state = dctcp_state, .get_info = dctcp_get_info, .flags = TCP_CONG_NEEDS_ECN, .owner = THIS_MODULE, .name = "dctcp", }; static struct tcp_congestion_ops dctcp_reno __read_mostly = { .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, .undo_cwnd = tcp_reno_undo_cwnd, .get_info = dctcp_get_info, .owner = THIS_MODULE, .name = "dctcp-reno", }; BTF_KFUNCS_START(tcp_dctcp_check_kfunc_ids) BTF_ID_FLAGS(func, dctcp_init) BTF_ID_FLAGS(func, dctcp_update_alpha) BTF_ID_FLAGS(func, dctcp_cwnd_event) BTF_ID_FLAGS(func, dctcp_ssthresh) BTF_ID_FLAGS(func, dctcp_cwnd_undo) BTF_ID_FLAGS(func, dctcp_state) BTF_KFUNCS_END(tcp_dctcp_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = { .owner = THIS_MODULE, .set = &tcp_dctcp_check_kfunc_ids, }; static int __init dctcp_register(void) { int ret; BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE); ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_dctcp_kfunc_set); if (ret < 0) return ret; return tcp_register_congestion_control(&dctcp); } static void __exit dctcp_unregister(void) { tcp_unregister_congestion_control(&dctcp); } module_init(dctcp_register); module_exit(dctcp_unregister); MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); MODULE_AUTHOR("Glenn Judd <glenn.judd@morganstanley.com>"); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("DataCenter TCP (DCTCP)"); |
| 12 7 7 7 7 7 4 7 7 7 7 7 7 7 7 7 7 7 7 7 3 7 7 4 4 4 4 4 1 1 1 1 3 1 2 2 3 7 7 7 7 7 7 7 7 7 7 7 9 3 3 6 6 6 9 3 11 3 2 9 9 6 9 9 12 1 12 11 11 11 11 1 1 11 2 9 9 9 9 5 4 4 14 14 14 12 13 2 13 12 12 12 12 1 11 12 4 4 2 4 2 4 4 4 4 4 4 4 14 14 14 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 | // SPDX-License-Identifier: GPL-2.0-or-later /* */ #include <linux/init.h> #include <linux/slab.h> #include <linux/usb.h> #include <linux/usb/audio.h> #include <linux/usb/audio-v2.h> #include <linux/usb/audio-v3.h> #include <sound/core.h> #include <sound/pcm.h> #include <sound/control.h> #include <sound/tlv.h> #include "usbaudio.h" #include "card.h" #include "proc.h" #include "quirks.h" #include "endpoint.h" #include "pcm.h" #include "helper.h" #include "format.h" #include "clock.h" #include "stream.h" #include "power.h" #include "media.h" static void audioformat_free(struct audioformat *fp) { list_del(&fp->list); /* unlink for avoiding double-free */ kfree(fp->rate_table); kfree(fp->chmap); kfree(fp); } /* * free a substream */ static void free_substream(struct snd_usb_substream *subs) { struct audioformat *fp, *n; if (!subs->num_formats) return; /* not initialized */ list_for_each_entry_safe(fp, n, &subs->fmt_list, list) audioformat_free(fp); kfree(subs->str_pd); snd_media_stream_delete(subs); } /* * free a usb stream instance */ static void snd_usb_audio_stream_free(struct snd_usb_stream *stream) { free_substream(&stream->substream[0]); free_substream(&stream->substream[1]); list_del(&stream->list); kfree(stream); } static void snd_usb_audio_pcm_free(struct snd_pcm *pcm) { struct snd_usb_stream *stream = pcm->private_data; if (stream) { stream->pcm = NULL; snd_usb_audio_stream_free(stream); } } /* * initialize the substream instance. */ static void snd_usb_init_substream(struct snd_usb_stream *as, int stream, struct audioformat *fp, struct snd_usb_power_domain *pd) { struct snd_usb_substream *subs = &as->substream[stream]; INIT_LIST_HEAD(&subs->fmt_list); spin_lock_init(&subs->lock); subs->stream = as; subs->direction = stream; subs->dev = as->chip->dev; subs->txfr_quirk = !!(as->chip->quirk_flags & QUIRK_FLAG_ALIGN_TRANSFER); subs->tx_length_quirk = !!(as->chip->quirk_flags & QUIRK_FLAG_TX_LENGTH); subs->speed = snd_usb_get_speed(subs->dev); subs->pkt_offset_adj = 0; subs->stream_offset_adj = 0; snd_usb_set_pcm_ops(as->pcm, stream); list_add_tail(&fp->list, &subs->fmt_list); subs->formats |= fp->formats; subs->num_formats++; subs->fmt_type = fp->fmt_type; subs->ep_num = fp->endpoint; if (fp->channels > subs->channels_max) subs->channels_max = fp->channels; if (pd) { subs->str_pd = pd; /* Initialize Power Domain to idle status D1 */ snd_usb_power_domain_set(subs->stream->chip, pd, UAC3_PD_STATE_D1); } snd_usb_preallocate_buffer(subs); } /* kctl callbacks for usb-audio channel maps */ static int usb_chmap_ctl_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { struct snd_pcm_chmap *info = snd_kcontrol_chip(kcontrol); struct snd_usb_substream *subs = info->private_data; uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; uinfo->count = subs->channels_max; uinfo->value.integer.min = 0; uinfo->value.integer.max = SNDRV_CHMAP_LAST; return 0; } /* check whether a duplicated entry exists in the audiofmt list */ static bool have_dup_chmap(struct snd_usb_substream *subs, struct audioformat *fp) { struct audioformat *prev = fp; list_for_each_entry_continue_reverse(prev, &subs->fmt_list, list) { if (prev->chmap && !memcmp(prev->chmap, fp->chmap, sizeof(*fp->chmap))) return true; } return false; } static int usb_chmap_ctl_tlv(struct snd_kcontrol *kcontrol, int op_flag, unsigned int size, unsigned int __user *tlv) { struct snd_pcm_chmap *info = snd_kcontrol_chip(kcontrol); struct snd_usb_substream *subs = info->private_data; struct audioformat *fp; unsigned int __user *dst; int count = 0; if (size < 8) return -ENOMEM; if (put_user(SNDRV_CTL_TLVT_CONTAINER, tlv)) return -EFAULT; size -= 8; dst = tlv + 2; list_for_each_entry(fp, &subs->fmt_list, list) { int i, ch_bytes; if (!fp->chmap) continue; if (have_dup_chmap(subs, fp)) continue; /* copy the entry */ ch_bytes = fp->chmap->channels * 4; if (size < 8 + ch_bytes) return -ENOMEM; if (put_user(SNDRV_CTL_TLVT_CHMAP_FIXED, dst) || put_user(ch_bytes, dst + 1)) return -EFAULT; dst += 2; for (i = 0; i < fp->chmap->channels; i++, dst++) { if (put_user(fp->chmap->map[i], dst)) return -EFAULT; } count += 8 + ch_bytes; size -= 8 + ch_bytes; } if (put_user(count, tlv + 1)) return -EFAULT; return 0; } static int usb_chmap_ctl_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_pcm_chmap *info = snd_kcontrol_chip(kcontrol); struct snd_usb_substream *subs = info->private_data; struct snd_pcm_chmap_elem *chmap = NULL; int i = 0; if (subs->cur_audiofmt) chmap = subs->cur_audiofmt->chmap; if (chmap) { for (i = 0; i < chmap->channels; i++) ucontrol->value.integer.value[i] = chmap->map[i]; } for (; i < subs->channels_max; i++) ucontrol->value.integer.value[i] = 0; return 0; } /* create a chmap kctl assigned to the given USB substream */ static int add_chmap(struct snd_pcm *pcm, int stream, struct snd_usb_substream *subs) { struct audioformat *fp; struct snd_pcm_chmap *chmap; struct snd_kcontrol *kctl; int err; list_for_each_entry(fp, &subs->fmt_list, list) if (fp->chmap) goto ok; /* no chmap is found */ return 0; ok: err = snd_pcm_add_chmap_ctls(pcm, stream, NULL, 0, 0, &chmap); if (err < 0) return err; /* override handlers */ chmap->private_data = subs; kctl = chmap->kctl; kctl->info = usb_chmap_ctl_info; kctl->get = usb_chmap_ctl_get; kctl->tlv.c = usb_chmap_ctl_tlv; return 0; } /* convert from USB ChannelConfig bits to ALSA chmap element */ static struct snd_pcm_chmap_elem *convert_chmap(int channels, unsigned int bits, int protocol) { static const unsigned int uac1_maps[] = { SNDRV_CHMAP_FL, /* left front */ SNDRV_CHMAP_FR, /* right front */ SNDRV_CHMAP_FC, /* center front */ SNDRV_CHMAP_LFE, /* LFE */ SNDRV_CHMAP_RL, /* left surround */ SNDRV_CHMAP_RR, /* right surround */ SNDRV_CHMAP_FLC, /* left of center */ SNDRV_CHMAP_FRC, /* right of center */ SNDRV_CHMAP_RC, /* surround */ SNDRV_CHMAP_SL, /* side left */ SNDRV_CHMAP_SR, /* side right */ SNDRV_CHMAP_TC, /* top */ 0 /* terminator */ }; static const unsigned int uac2_maps[] = { SNDRV_CHMAP_FL, /* front left */ SNDRV_CHMAP_FR, /* front right */ SNDRV_CHMAP_FC, /* front center */ SNDRV_CHMAP_LFE, /* LFE */ SNDRV_CHMAP_RL, /* back left */ SNDRV_CHMAP_RR, /* back right */ SNDRV_CHMAP_FLC, /* front left of center */ SNDRV_CHMAP_FRC, /* front right of center */ SNDRV_CHMAP_RC, /* back center */ SNDRV_CHMAP_SL, /* side left */ SNDRV_CHMAP_SR, /* side right */ SNDRV_CHMAP_TC, /* top center */ SNDRV_CHMAP_TFL, /* top front left */ SNDRV_CHMAP_TFC, /* top front center */ SNDRV_CHMAP_TFR, /* top front right */ SNDRV_CHMAP_TRL, /* top back left */ SNDRV_CHMAP_TRC, /* top back center */ SNDRV_CHMAP_TRR, /* top back right */ SNDRV_CHMAP_TFLC, /* top front left of center */ SNDRV_CHMAP_TFRC, /* top front right of center */ SNDRV_CHMAP_LLFE, /* left LFE */ SNDRV_CHMAP_RLFE, /* right LFE */ SNDRV_CHMAP_TSL, /* top side left */ SNDRV_CHMAP_TSR, /* top side right */ SNDRV_CHMAP_BC, /* bottom center */ SNDRV_CHMAP_RLC, /* back left of center */ SNDRV_CHMAP_RRC, /* back right of center */ 0 /* terminator */ }; struct snd_pcm_chmap_elem *chmap; const unsigned int *maps; int c; if (channels > ARRAY_SIZE(chmap->map)) return NULL; chmap = kzalloc(sizeof(*chmap), GFP_KERNEL); if (!chmap) return NULL; maps = protocol == UAC_VERSION_2 ? uac2_maps : uac1_maps; chmap->channels = channels; c = 0; if (bits) { for (; bits && *maps; maps++, bits >>= 1) { if (bits & 1) chmap->map[c++] = *maps; if (c == chmap->channels) break; } } else { /* If we're missing wChannelConfig, then guess something to make sure the channel map is not skipped entirely */ if (channels == 1) chmap->map[c++] = SNDRV_CHMAP_MONO; else for (; c < channels && *maps; maps++) chmap->map[c++] = *maps; } for (; c < channels; c++) chmap->map[c] = SNDRV_CHMAP_UNKNOWN; return chmap; } /* UAC3 device stores channels information in Cluster Descriptors */ static struct snd_pcm_chmap_elem *convert_chmap_v3(struct uac3_cluster_header_descriptor *cluster) { unsigned int channels = cluster->bNrChannels; struct snd_pcm_chmap_elem *chmap; void *p = cluster; int len, c; if (channels > ARRAY_SIZE(chmap->map)) return NULL; chmap = kzalloc(sizeof(*chmap), GFP_KERNEL); if (!chmap) return NULL; len = le16_to_cpu(cluster->wLength); c = 0; p += sizeof(struct uac3_cluster_header_descriptor); while (((p - (void *)cluster) < len) && (c < channels)) { struct uac3_cluster_segment_descriptor *cs_desc = p; u16 cs_len; u8 cs_type; cs_len = le16_to_cpu(cs_desc->wLength); cs_type = cs_desc->bSegmentType; if (cs_type == UAC3_CHANNEL_INFORMATION) { struct uac3_cluster_information_segment_descriptor *is = p; unsigned char map; /* * TODO: this conversion is not complete, update it * after adding UAC3 values to asound.h */ switch (is->bChRelationship) { case UAC3_CH_MONO: map = SNDRV_CHMAP_MONO; break; case UAC3_CH_LEFT: case UAC3_CH_FRONT_LEFT: case UAC3_CH_HEADPHONE_LEFT: map = SNDRV_CHMAP_FL; break; case UAC3_CH_RIGHT: case UAC3_CH_FRONT_RIGHT: case UAC3_CH_HEADPHONE_RIGHT: map = SNDRV_CHMAP_FR; break; case UAC3_CH_FRONT_CENTER: map = SNDRV_CHMAP_FC; break; case UAC3_CH_FRONT_LEFT_OF_CENTER: map = SNDRV_CHMAP_FLC; break; case UAC3_CH_FRONT_RIGHT_OF_CENTER: map = SNDRV_CHMAP_FRC; break; case UAC3_CH_SIDE_LEFT: map = SNDRV_CHMAP_SL; break; case UAC3_CH_SIDE_RIGHT: map = SNDRV_CHMAP_SR; break; case UAC3_CH_BACK_LEFT: map = SNDRV_CHMAP_RL; break; case UAC3_CH_BACK_RIGHT: map = SNDRV_CHMAP_RR; break; case UAC3_CH_BACK_CENTER: map = SNDRV_CHMAP_RC; break; case UAC3_CH_BACK_LEFT_OF_CENTER: map = SNDRV_CHMAP_RLC; break; case UAC3_CH_BACK_RIGHT_OF_CENTER: map = SNDRV_CHMAP_RRC; break; case UAC3_CH_TOP_CENTER: map = SNDRV_CHMAP_TC; break; case UAC3_CH_TOP_FRONT_LEFT: map = SNDRV_CHMAP_TFL; break; case UAC3_CH_TOP_FRONT_RIGHT: map = SNDRV_CHMAP_TFR; break; case UAC3_CH_TOP_FRONT_CENTER: map = SNDRV_CHMAP_TFC; break; case UAC3_CH_TOP_FRONT_LOC: map = SNDRV_CHMAP_TFLC; break; case UAC3_CH_TOP_FRONT_ROC: map = SNDRV_CHMAP_TFRC; break; case UAC3_CH_TOP_SIDE_LEFT: map = SNDRV_CHMAP_TSL; break; case UAC3_CH_TOP_SIDE_RIGHT: map = SNDRV_CHMAP_TSR; break; case UAC3_CH_TOP_BACK_LEFT: map = SNDRV_CHMAP_TRL; break; case UAC3_CH_TOP_BACK_RIGHT: map = SNDRV_CHMAP_TRR; break; case UAC3_CH_TOP_BACK_CENTER: map = SNDRV_CHMAP_TRC; break; case UAC3_CH_BOTTOM_CENTER: map = SNDRV_CHMAP_BC; break; case UAC3_CH_LOW_FREQUENCY_EFFECTS: map = SNDRV_CHMAP_LFE; break; case UAC3_CH_LFE_LEFT: map = SNDRV_CHMAP_LLFE; break; case UAC3_CH_LFE_RIGHT: map = SNDRV_CHMAP_RLFE; break; case UAC3_CH_RELATIONSHIP_UNDEFINED: default: map = SNDRV_CHMAP_UNKNOWN; break; } chmap->map[c++] = map; } p += cs_len; } if (channels < c) pr_err("%s: channel number mismatch\n", __func__); chmap->channels = channels; for (; c < channels; c++) chmap->map[c] = SNDRV_CHMAP_UNKNOWN; return chmap; } /* * add this endpoint to the chip instance. * if a stream with the same endpoint already exists, append to it. * if not, create a new pcm stream. note, fp is added to the substream * fmt_list and will be freed on the chip instance release. do not free * fp or do remove it from the substream fmt_list to avoid double-free. */ static int __snd_usb_add_audio_stream(struct snd_usb_audio *chip, int stream, struct audioformat *fp, struct snd_usb_power_domain *pd) { struct snd_usb_stream *as; struct snd_usb_substream *subs; struct snd_pcm *pcm; int err; list_for_each_entry(as, &chip->pcm_list, list) { if (as->fmt_type != fp->fmt_type) continue; subs = &as->substream[stream]; if (subs->ep_num == fp->endpoint) { list_add_tail(&fp->list, &subs->fmt_list); subs->num_formats++; subs->formats |= fp->formats; return 0; } } if (chip->card->registered) chip->need_delayed_register = true; /* look for an empty stream */ list_for_each_entry(as, &chip->pcm_list, list) { if (as->fmt_type != fp->fmt_type) continue; subs = &as->substream[stream]; if (subs->ep_num) continue; err = snd_pcm_new_stream(as->pcm, stream, 1); if (err < 0) return err; snd_usb_init_substream(as, stream, fp, pd); return add_chmap(as->pcm, stream, subs); } /* create a new pcm */ as = kzalloc(sizeof(*as), GFP_KERNEL); if (!as) return -ENOMEM; as->pcm_index = chip->pcm_devs; as->chip = chip; as->fmt_type = fp->fmt_type; err = snd_pcm_new(chip->card, "USB Audio", chip->pcm_devs, stream == SNDRV_PCM_STREAM_PLAYBACK ? 1 : 0, stream == SNDRV_PCM_STREAM_PLAYBACK ? 0 : 1, &pcm); if (err < 0) { kfree(as); return err; } as->pcm = pcm; pcm->private_data = as; pcm->private_free = snd_usb_audio_pcm_free; pcm->info_flags = 0; if (chip->pcm_devs > 0) sprintf(pcm->name, "USB Audio #%d", chip->pcm_devs); else strcpy(pcm->name, "USB Audio"); snd_usb_init_substream(as, stream, fp, pd); /* * Keep using head insertion for M-Audio Audiophile USB (tm) which has a * fix to swap capture stream order in conf/cards/USB-audio.conf */ if (chip->usb_id == USB_ID(0x0763, 0x2003)) list_add(&as->list, &chip->pcm_list); else list_add_tail(&as->list, &chip->pcm_list); chip->pcm_devs++; snd_usb_proc_pcm_format_add(as); return add_chmap(pcm, stream, &as->substream[stream]); } int snd_usb_add_audio_stream(struct snd_usb_audio *chip, int stream, struct audioformat *fp) { return __snd_usb_add_audio_stream(chip, stream, fp, NULL); } static int snd_usb_add_audio_stream_v3(struct snd_usb_audio *chip, int stream, struct audioformat *fp, struct snd_usb_power_domain *pd) { return __snd_usb_add_audio_stream(chip, stream, fp, pd); } static int parse_uac_endpoint_attributes(struct snd_usb_audio *chip, struct usb_host_interface *alts, int protocol, int iface_no) { /* parsed with a v1 header here. that's ok as we only look at the * header first which is the same for both versions */ struct uac_iso_endpoint_descriptor *csep; struct usb_interface_descriptor *altsd = get_iface_desc(alts); int attributes = 0; csep = snd_usb_find_desc(alts->endpoint[0].extra, alts->endpoint[0].extralen, NULL, USB_DT_CS_ENDPOINT); /* Creamware Noah has this descriptor after the 2nd endpoint */ if (!csep && altsd->bNumEndpoints >= 2) csep = snd_usb_find_desc(alts->endpoint[1].extra, alts->endpoint[1].extralen, NULL, USB_DT_CS_ENDPOINT); /* * If we can't locate the USB_DT_CS_ENDPOINT descriptor in the extra * bytes after the first endpoint, go search the entire interface. * Some devices have it directly *before* the standard endpoint. */ if (!csep) csep = snd_usb_find_desc(alts->extra, alts->extralen, NULL, USB_DT_CS_ENDPOINT); if (!csep || csep->bLength < 7 || csep->bDescriptorSubtype != UAC_EP_GENERAL) goto error; if (protocol == UAC_VERSION_1) { attributes = csep->bmAttributes; } else if (protocol == UAC_VERSION_2) { struct uac2_iso_endpoint_descriptor *csep2 = (struct uac2_iso_endpoint_descriptor *) csep; if (csep2->bLength < sizeof(*csep2)) goto error; attributes = csep->bmAttributes & UAC_EP_CS_ATTR_FILL_MAX; /* emulate the endpoint attributes of a v1 device */ if (csep2->bmControls & UAC2_CONTROL_PITCH) attributes |= UAC_EP_CS_ATTR_PITCH_CONTROL; } else { /* UAC_VERSION_3 */ struct uac3_iso_endpoint_descriptor *csep3 = (struct uac3_iso_endpoint_descriptor *) csep; if (csep3->bLength < sizeof(*csep3)) goto error; /* emulate the endpoint attributes of a v1 device */ if (le32_to_cpu(csep3->bmControls) & UAC2_CONTROL_PITCH) attributes |= UAC_EP_CS_ATTR_PITCH_CONTROL; } return attributes; error: usb_audio_warn(chip, "%u:%d : no or invalid class specific endpoint descriptor\n", iface_no, altsd->bAlternateSetting); return 0; } /* find an input terminal descriptor (either UAC1 or UAC2) with the given * terminal id */ static void * snd_usb_find_input_terminal_descriptor(struct usb_host_interface *ctrl_iface, int terminal_id, int protocol) { struct uac2_input_terminal_descriptor *term = NULL; while ((term = snd_usb_find_csint_desc(ctrl_iface->extra, ctrl_iface->extralen, term, UAC_INPUT_TERMINAL))) { if (!snd_usb_validate_audio_desc(term, protocol)) continue; if (term->bTerminalID == terminal_id) return term; } return NULL; } static void * snd_usb_find_output_terminal_descriptor(struct usb_host_interface *ctrl_iface, int terminal_id, int protocol) { /* OK to use with both UAC2 and UAC3 */ struct uac2_output_terminal_descriptor *term = NULL; while ((term = snd_usb_find_csint_desc(ctrl_iface->extra, ctrl_iface->extralen, term, UAC_OUTPUT_TERMINAL))) { if (!snd_usb_validate_audio_desc(term, protocol)) continue; if (term->bTerminalID == terminal_id) return term; } return NULL; } static struct audioformat * audio_format_alloc_init(struct snd_usb_audio *chip, struct usb_host_interface *alts, int protocol, int iface_no, int altset_idx, int altno, int num_channels, int clock) { struct audioformat *fp; fp = kzalloc(sizeof(*fp), GFP_KERNEL); if (!fp) return NULL; fp->iface = iface_no; fp->altsetting = altno; fp->altset_idx = altset_idx; fp->endpoint = get_endpoint(alts, 0)->bEndpointAddress; fp->ep_attr = get_endpoint(alts, 0)->bmAttributes; fp->datainterval = snd_usb_parse_datainterval(chip, alts); fp->protocol = protocol; fp->maxpacksize = le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize); fp->channels = num_channels; if (snd_usb_get_speed(chip->dev) == USB_SPEED_HIGH) fp->maxpacksize = (((fp->maxpacksize >> 11) & 3) + 1) * (fp->maxpacksize & 0x7ff); fp->clock = clock; INIT_LIST_HEAD(&fp->list); return fp; } static struct audioformat * snd_usb_get_audioformat_uac12(struct snd_usb_audio *chip, struct usb_host_interface *alts, int protocol, int iface_no, int altset_idx, int altno, int stream, int bm_quirk) { struct usb_device *dev = chip->dev; struct uac_format_type_i_continuous_descriptor *fmt; unsigned int num_channels = 0, chconfig = 0; struct usb_host_interface *ctrl_intf; struct audioformat *fp; int clock = 0; u64 format; ctrl_intf = snd_usb_find_ctrl_interface(chip, iface_no); /* get audio formats */ if (protocol == UAC_VERSION_1) { struct uac1_as_header_descriptor *as = snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); struct uac_input_terminal_descriptor *iterm; if (!as) { dev_err(&dev->dev, "%u:%d : UAC_AS_GENERAL descriptor not found\n", iface_no, altno); return NULL; } if (as->bLength < sizeof(*as)) { dev_err(&dev->dev, "%u:%d : invalid UAC_AS_GENERAL desc\n", iface_no, altno); return NULL; } format = le16_to_cpu(as->wFormatTag); /* remember the format value */ iterm = snd_usb_find_input_terminal_descriptor(ctrl_intf, as->bTerminalLink, protocol); if (iterm) { num_channels = iterm->bNrChannels; chconfig = le16_to_cpu(iterm->wChannelConfig); } } else { /* UAC_VERSION_2 */ struct uac2_input_terminal_descriptor *input_term; struct uac2_output_terminal_descriptor *output_term; struct uac2_as_header_descriptor *as = snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); if (!as) { dev_err(&dev->dev, "%u:%d : UAC_AS_GENERAL descriptor not found\n", iface_no, altno); return NULL; } if (as->bLength < sizeof(*as)) { dev_err(&dev->dev, "%u:%d : invalid UAC_AS_GENERAL desc\n", iface_no, altno); return NULL; } num_channels = as->bNrChannels; format = le32_to_cpu(as->bmFormats); chconfig = le32_to_cpu(as->bmChannelConfig); /* * lookup the terminal associated to this interface * to extract the clock */ input_term = snd_usb_find_input_terminal_descriptor(ctrl_intf, as->bTerminalLink, protocol); if (input_term) { clock = input_term->bCSourceID; if (!chconfig && (num_channels == input_term->bNrChannels)) chconfig = le32_to_cpu(input_term->bmChannelConfig); goto found_clock; } output_term = snd_usb_find_output_terminal_descriptor(ctrl_intf, as->bTerminalLink, protocol); if (output_term) { clock = output_term->bCSourceID; goto found_clock; } dev_err(&dev->dev, "%u:%d : bogus bTerminalLink %d\n", iface_no, altno, as->bTerminalLink); return NULL; } found_clock: /* get format type */ fmt = snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, UAC_FORMAT_TYPE); if (!fmt) { dev_err(&dev->dev, "%u:%d : no UAC_FORMAT_TYPE desc\n", iface_no, altno); return NULL; } if (((protocol == UAC_VERSION_1) && (fmt->bLength < 8)) || ((protocol == UAC_VERSION_2) && (fmt->bLength < 6))) { dev_err(&dev->dev, "%u:%d : invalid UAC_FORMAT_TYPE desc\n", iface_no, altno); return NULL; } /* * Blue Microphones workaround: The last altsetting is * identical with the previous one, except for a larger * packet size, but is actually a mislabeled two-channel * setting; ignore it. * * Part 2: analyze quirk flag and format */ if (bm_quirk && fmt->bNrChannels == 1 && fmt->bSubframeSize == 2) return NULL; fp = audio_format_alloc_init(chip, alts, protocol, iface_no, altset_idx, altno, num_channels, clock); if (!fp) return ERR_PTR(-ENOMEM); fp->attributes = parse_uac_endpoint_attributes(chip, alts, protocol, iface_no); /* some quirks for attributes here */ snd_usb_audioformat_attributes_quirk(chip, fp, stream); /* ok, let's parse further... */ if (snd_usb_parse_audio_format(chip, fp, format, fmt, stream) < 0) { audioformat_free(fp); return NULL; } /* Create chmap */ if (fp->channels != num_channels) chconfig = 0; fp->chmap = convert_chmap(fp->channels, chconfig, protocol); return fp; } static struct audioformat * snd_usb_get_audioformat_uac3(struct snd_usb_audio *chip, struct usb_host_interface *alts, struct snd_usb_power_domain **pd_out, int iface_no, int altset_idx, int altno, int stream) { struct usb_device *dev = chip->dev; struct uac3_input_terminal_descriptor *input_term; struct uac3_output_terminal_descriptor *output_term; struct uac3_cluster_header_descriptor *cluster; struct uac3_as_header_descriptor *as = NULL; struct uac3_hc_descriptor_header hc_header; struct usb_host_interface *ctrl_intf; struct snd_pcm_chmap_elem *chmap; struct snd_usb_power_domain *pd; unsigned char badd_profile; u64 badd_formats = 0; unsigned int num_channels; struct audioformat *fp; u16 cluster_id, wLength; int clock = 0; int err; badd_profile = chip->badd_profile; ctrl_intf = snd_usb_find_ctrl_interface(chip, iface_no); if (badd_profile >= UAC3_FUNCTION_SUBCLASS_GENERIC_IO) { unsigned int maxpacksize = le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize); switch (maxpacksize) { default: dev_err(&dev->dev, "%u:%d : incorrect wMaxPacketSize for BADD profile\n", iface_no, altno); return NULL; case UAC3_BADD_EP_MAXPSIZE_SYNC_MONO_16: case UAC3_BADD_EP_MAXPSIZE_ASYNC_MONO_16: badd_formats = SNDRV_PCM_FMTBIT_S16_LE; num_channels = 1; break; case UAC3_BADD_EP_MAXPSIZE_SYNC_MONO_24: case UAC3_BADD_EP_MAXPSIZE_ASYNC_MONO_24: badd_formats = SNDRV_PCM_FMTBIT_S24_3LE; num_channels = 1; break; case UAC3_BADD_EP_MAXPSIZE_SYNC_STEREO_16: case UAC3_BADD_EP_MAXPSIZE_ASYNC_STEREO_16: badd_formats = SNDRV_PCM_FMTBIT_S16_LE; num_channels = 2; break; case UAC3_BADD_EP_MAXPSIZE_SYNC_STEREO_24: case UAC3_BADD_EP_MAXPSIZE_ASYNC_STEREO_24: badd_formats = SNDRV_PCM_FMTBIT_S24_3LE; num_channels = 2; break; } chmap = kzalloc(sizeof(*chmap), GFP_KERNEL); if (!chmap) return ERR_PTR(-ENOMEM); if (num_channels == 1) { chmap->map[0] = SNDRV_CHMAP_MONO; } else { chmap->map[0] = SNDRV_CHMAP_FL; chmap->map[1] = SNDRV_CHMAP_FR; } chmap->channels = num_channels; clock = UAC3_BADD_CS_ID9; goto found_clock; } as = snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); if (!as) { dev_err(&dev->dev, "%u:%d : UAC_AS_GENERAL descriptor not found\n", iface_no, altno); return NULL; } if (as->bLength < sizeof(*as)) { dev_err(&dev->dev, "%u:%d : invalid UAC_AS_GENERAL desc\n", iface_no, altno); return NULL; } cluster_id = le16_to_cpu(as->wClusterDescrID); if (!cluster_id) { dev_err(&dev->dev, "%u:%d : no cluster descriptor\n", iface_no, altno); return NULL; } /* * Get number of channels and channel map through * High Capability Cluster Descriptor * * First step: get High Capability header and * read size of Cluster Descriptor */ err = snd_usb_ctl_msg(chip->dev, usb_rcvctrlpipe(chip->dev, 0), UAC3_CS_REQ_HIGH_CAPABILITY_DESCRIPTOR, USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN, cluster_id, snd_usb_ctrl_intf(ctrl_intf), &hc_header, sizeof(hc_header)); if (err < 0) return ERR_PTR(err); else if (err != sizeof(hc_header)) { dev_err(&dev->dev, "%u:%d : can't get High Capability descriptor\n", iface_no, altno); return ERR_PTR(-EIO); } /* * Second step: allocate needed amount of memory * and request Cluster Descriptor */ wLength = le16_to_cpu(hc_header.wLength); cluster = kzalloc(wLength, GFP_KERNEL); if (!cluster) return ERR_PTR(-ENOMEM); err = snd_usb_ctl_msg(chip->dev, usb_rcvctrlpipe(chip->dev, 0), UAC3_CS_REQ_HIGH_CAPABILITY_DESCRIPTOR, USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN, cluster_id, snd_usb_ctrl_intf(ctrl_intf), cluster, wLength); if (err < 0) { kfree(cluster); return ERR_PTR(err); } else if (err != wLength) { dev_err(&dev->dev, "%u:%d : can't get Cluster Descriptor\n", iface_no, altno); kfree(cluster); return ERR_PTR(-EIO); } num_channels = cluster->bNrChannels; chmap = convert_chmap_v3(cluster); kfree(cluster); /* * lookup the terminal associated to this interface * to extract the clock */ input_term = snd_usb_find_input_terminal_descriptor(ctrl_intf, as->bTerminalLink, UAC_VERSION_3); if (input_term) { clock = input_term->bCSourceID; goto found_clock; } output_term = snd_usb_find_output_terminal_descriptor(ctrl_intf, as->bTerminalLink, UAC_VERSION_3); if (output_term) { clock = output_term->bCSourceID; goto found_clock; } dev_err(&dev->dev, "%u:%d : bogus bTerminalLink %d\n", iface_no, altno, as->bTerminalLink); kfree(chmap); return NULL; found_clock: fp = audio_format_alloc_init(chip, alts, UAC_VERSION_3, iface_no, altset_idx, altno, num_channels, clock); if (!fp) { kfree(chmap); return ERR_PTR(-ENOMEM); } fp->chmap = chmap; if (badd_profile >= UAC3_FUNCTION_SUBCLASS_GENERIC_IO) { fp->attributes = 0; /* No attributes */ fp->fmt_type = UAC_FORMAT_TYPE_I; fp->formats = badd_formats; fp->nr_rates = 0; /* SNDRV_PCM_RATE_CONTINUOUS */ fp->rate_min = UAC3_BADD_SAMPLING_RATE; fp->rate_max = UAC3_BADD_SAMPLING_RATE; fp->rates = SNDRV_PCM_RATE_CONTINUOUS; pd = kzalloc(sizeof(*pd), GFP_KERNEL); if (!pd) { audioformat_free(fp); return NULL; } pd->pd_id = (stream == SNDRV_PCM_STREAM_PLAYBACK) ? UAC3_BADD_PD_ID10 : UAC3_BADD_PD_ID11; pd->pd_d1d0_rec = UAC3_BADD_PD_RECOVER_D1D0; pd->pd_d2d0_rec = UAC3_BADD_PD_RECOVER_D2D0; pd->ctrl_iface = ctrl_intf; } else { fp->attributes = parse_uac_endpoint_attributes(chip, alts, UAC_VERSION_3, iface_no); pd = snd_usb_find_power_domain(ctrl_intf, as->bTerminalLink); /* ok, let's parse further... */ if (snd_usb_parse_audio_format_v3(chip, fp, as, stream) < 0) { kfree(pd); audioformat_free(fp); return NULL; } } if (pd) *pd_out = pd; return fp; } static int __snd_usb_parse_audio_interface(struct snd_usb_audio *chip, int iface_no, bool *has_non_pcm, bool non_pcm) { struct usb_device *dev; struct usb_interface *iface; struct usb_host_interface *alts; struct usb_interface_descriptor *altsd; int i, altno, err, stream; struct audioformat *fp = NULL; struct snd_usb_power_domain *pd = NULL; bool set_iface_first; int num, protocol; dev = chip->dev; /* parse the interface's altsettings */ iface = usb_ifnum_to_if(dev, iface_no); num = iface->num_altsetting; /* * Dallas DS4201 workaround: It presents 5 altsettings, but the last * one misses syncpipe, and does not produce any sound. */ if (chip->usb_id == USB_ID(0x04fa, 0x4201) && num >= 4) num = 4; for (i = 0; i < num; i++) { alts = &iface->altsetting[i]; altsd = get_iface_desc(alts); protocol = altsd->bInterfaceProtocol; /* skip invalid one */ if (((altsd->bInterfaceClass != USB_CLASS_AUDIO || (altsd->bInterfaceSubClass != USB_SUBCLASS_AUDIOSTREAMING && altsd->bInterfaceSubClass != USB_SUBCLASS_VENDOR_SPEC)) && altsd->bInterfaceClass != USB_CLASS_VENDOR_SPEC) || altsd->bNumEndpoints < 1 || le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize) == 0) continue; /* must be isochronous */ if ((get_endpoint(alts, 0)->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) != USB_ENDPOINT_XFER_ISOC) continue; /* check direction */ stream = (get_endpoint(alts, 0)->bEndpointAddress & USB_DIR_IN) ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK; altno = altsd->bAlternateSetting; if (snd_usb_apply_interface_quirk(chip, iface_no, altno)) continue; /* * Roland audio streaming interfaces are marked with protocols * 0/1/2, but are UAC 1 compatible. */ if (USB_ID_VENDOR(chip->usb_id) == 0x0582 && altsd->bInterfaceClass == USB_CLASS_VENDOR_SPEC && protocol <= 2) protocol = UAC_VERSION_1; switch (protocol) { default: dev_dbg(&dev->dev, "%u:%d: unknown interface protocol %#02x, assuming v1\n", iface_no, altno, protocol); protocol = UAC_VERSION_1; fallthrough; case UAC_VERSION_1: case UAC_VERSION_2: { int bm_quirk = 0; /* * Blue Microphones workaround: The last altsetting is * identical with the previous one, except for a larger * packet size, but is actually a mislabeled two-channel * setting; ignore it. * * Part 1: prepare quirk flag */ if (altno == 2 && num == 3 && fp && fp->altsetting == 1 && fp->channels == 1 && fp->formats == SNDRV_PCM_FMTBIT_S16_LE && protocol == UAC_VERSION_1 && le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize) == fp->maxpacksize * 2) bm_quirk = 1; fp = snd_usb_get_audioformat_uac12(chip, alts, protocol, iface_no, i, altno, stream, bm_quirk); break; } case UAC_VERSION_3: fp = snd_usb_get_audioformat_uac3(chip, alts, &pd, iface_no, i, altno, stream); break; } if (!fp) continue; else if (IS_ERR(fp)) return PTR_ERR(fp); if (fp->fmt_type != UAC_FORMAT_TYPE_I) *has_non_pcm = true; if ((fp->fmt_type == UAC_FORMAT_TYPE_I) == non_pcm) { audioformat_free(fp); kfree(pd); fp = NULL; pd = NULL; continue; } snd_usb_audioformat_set_sync_ep(chip, fp); dev_dbg(&dev->dev, "%u:%d: add audio endpoint %#x\n", iface_no, altno, fp->endpoint); if (protocol == UAC_VERSION_3) err = snd_usb_add_audio_stream_v3(chip, stream, fp, pd); else err = snd_usb_add_audio_stream(chip, stream, fp); if (err < 0) { audioformat_free(fp); kfree(pd); return err; } /* add endpoints */ err = snd_usb_add_endpoint(chip, fp->endpoint, SND_USB_ENDPOINT_TYPE_DATA); if (err < 0) return err; if (fp->sync_ep) { err = snd_usb_add_endpoint(chip, fp->sync_ep, fp->implicit_fb ? SND_USB_ENDPOINT_TYPE_DATA : SND_USB_ENDPOINT_TYPE_SYNC); if (err < 0) return err; } set_iface_first = false; if (protocol == UAC_VERSION_1 || (chip->quirk_flags & QUIRK_FLAG_SET_IFACE_FIRST)) set_iface_first = true; /* try to set the interface... */ usb_set_interface(chip->dev, iface_no, 0); if (set_iface_first) usb_set_interface(chip->dev, iface_no, altno); snd_usb_init_pitch(chip, fp); snd_usb_init_sample_rate(chip, fp, fp->rate_max); if (!set_iface_first) usb_set_interface(chip->dev, iface_no, altno); } return 0; } int snd_usb_parse_audio_interface(struct snd_usb_audio *chip, int iface_no) { int err; bool has_non_pcm = false; /* parse PCM formats */ err = __snd_usb_parse_audio_interface(chip, iface_no, &has_non_pcm, false); if (err < 0) return err; if (has_non_pcm) { /* parse non-PCM formats */ err = __snd_usb_parse_audio_interface(chip, iface_no, &has_non_pcm, true); if (err < 0) return err; } return 0; } |
| 3 3 2 1 1 2 3 3 2 3 2 2 2 2 2 2 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 | // SPDX-License-Identifier: GPL-2.0-only /* * net/sched/sch_mq.c Classful multiqueue dummy scheduler * * Copyright (c) 2009 Patrick McHardy <kaber@trash.net> */ #include <linux/types.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <net/netlink.h> #include <net/pkt_cls.h> #include <net/pkt_sched.h> #include <net/sch_generic.h> struct mq_sched { struct Qdisc **qdiscs; }; static int mq_offload(struct Qdisc *sch, enum tc_mq_command cmd) { struct net_device *dev = qdisc_dev(sch); struct tc_mq_qopt_offload opt = { .command = cmd, .handle = sch->handle, }; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return -EOPNOTSUPP; return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQ, &opt); } static int mq_offload_stats(struct Qdisc *sch) { struct tc_mq_qopt_offload opt = { .command = TC_MQ_STATS, .handle = sch->handle, .stats = { .bstats = &sch->bstats, .qstats = &sch->qstats, }, }; return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_MQ, &opt); } static void mq_destroy(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct mq_sched *priv = qdisc_priv(sch); unsigned int ntx; mq_offload(sch, TC_MQ_DESTROY); if (!priv->qdiscs) return; for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++) qdisc_put(priv->qdiscs[ntx]); kfree(priv->qdiscs); } static int mq_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct net_device *dev = qdisc_dev(sch); struct mq_sched *priv = qdisc_priv(sch); struct netdev_queue *dev_queue; struct Qdisc *qdisc; unsigned int ntx; if (sch->parent != TC_H_ROOT) return -EOPNOTSUPP; if (!netif_is_multiqueue(dev)) return -EOPNOTSUPP; /* pre-allocate qdiscs, attachment can't fail */ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), GFP_KERNEL); if (!priv->qdiscs) return -ENOMEM; for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { dev_queue = netdev_get_tx_queue(dev, ntx); qdisc = qdisc_create_dflt(dev_queue, get_default_qdisc_ops(dev, ntx), TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(ntx + 1)), extack); if (!qdisc) return -ENOMEM; priv->qdiscs[ntx] = qdisc; qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } sch->flags |= TCQ_F_MQROOT; mq_offload(sch, TC_MQ_CREATE); return 0; } static void mq_attach(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct mq_sched *priv = qdisc_priv(sch); struct Qdisc *qdisc, *old; unsigned int ntx; for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { qdisc = priv->qdiscs[ntx]; old = dev_graft_qdisc(qdisc->dev_queue, qdisc); if (old) qdisc_put(old); #ifdef CONFIG_NET_SCHED if (ntx < dev->real_num_tx_queues) qdisc_hash_add(qdisc, false); #endif } kfree(priv->qdiscs); priv->qdiscs = NULL; } static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct net_device *dev = qdisc_dev(sch); struct Qdisc *qdisc; unsigned int ntx; sch->q.qlen = 0; gnet_stats_basic_sync_init(&sch->bstats); memset(&sch->qstats, 0, sizeof(sch->qstats)); /* MQ supports lockless qdiscs. However, statistics accounting needs * to account for all, none, or a mix of locked and unlocked child * qdiscs. Percpu stats are added to counters in-band and locking * qdisc totals are added at end. */ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping); spin_lock_bh(qdisc_lock(qdisc)); gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, &qdisc->bstats, false); gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats, &qdisc->qstats); sch->q.qlen += qdisc_qlen(qdisc); spin_unlock_bh(qdisc_lock(qdisc)); } return mq_offload_stats(sch); } static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl) { struct net_device *dev = qdisc_dev(sch); unsigned long ntx = cl - 1; if (ntx >= dev->num_tx_queues) return NULL; return netdev_get_tx_queue(dev, ntx); } static struct netdev_queue *mq_select_queue(struct Qdisc *sch, struct tcmsg *tcm) { return mq_queue_get(sch, TC_H_MIN(tcm->tcm_parent)); } static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { struct netdev_queue *dev_queue = mq_queue_get(sch, cl); struct tc_mq_qopt_offload graft_offload; struct net_device *dev = qdisc_dev(sch); if (dev->flags & IFF_UP) dev_deactivate(dev); *old = dev_graft_qdisc(dev_queue, new); if (new) new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; if (dev->flags & IFF_UP) dev_activate(dev); graft_offload.handle = sch->handle; graft_offload.graft_params.queue = cl - 1; graft_offload.graft_params.child_handle = new ? new->handle : 0; graft_offload.command = TC_MQ_GRAFT; qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, *old, TC_SETUP_QDISC_MQ, &graft_offload, extack); return 0; } static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl) { struct netdev_queue *dev_queue = mq_queue_get(sch, cl); return rtnl_dereference(dev_queue->qdisc_sleeping); } static unsigned long mq_find(struct Qdisc *sch, u32 classid) { unsigned int ntx = TC_H_MIN(classid); if (!mq_queue_get(sch, ntx)) return 0; return ntx; } static int mq_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { struct netdev_queue *dev_queue = mq_queue_get(sch, cl); tcm->tcm_parent = TC_H_ROOT; tcm->tcm_handle |= TC_H_MIN(cl); tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle; return 0; } static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct gnet_dump *d) { struct netdev_queue *dev_queue = mq_queue_get(sch, cl); sch = rtnl_dereference(dev_queue->qdisc_sleeping); if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 || qdisc_qstats_copy(d, sch) < 0) return -1; return 0; } static void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct net_device *dev = qdisc_dev(sch); unsigned int ntx; if (arg->stop) return; arg->count = arg->skip; for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) { if (!tc_qdisc_stats_dump(sch, ntx + 1, arg)) break; } } static const struct Qdisc_class_ops mq_class_ops = { .select_queue = mq_select_queue, .graft = mq_graft, .leaf = mq_leaf, .find = mq_find, .walk = mq_walk, .dump = mq_dump_class, .dump_stats = mq_dump_class_stats, }; struct Qdisc_ops mq_qdisc_ops __read_mostly = { .cl_ops = &mq_class_ops, .id = "mq", .priv_size = sizeof(struct mq_sched), .init = mq_init, .destroy = mq_destroy, .attach = mq_attach, .change_real_num_tx = mq_change_real_num_tx, .dump = mq_dump, .owner = THIS_MODULE, }; |
| 1 1 1 1 2 1 1 1 2 1 3 3 3 3 1 2 1 1 1 1 1 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 | // SPDX-License-Identifier: GPL-2.0-only /* * kernel/ksysfs.c - sysfs attributes in /sys/kernel, which * are not related to any other subsystem * * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org> */ #include <asm/byteorder.h> #include <linux/kobject.h> #include <linux/string.h> #include <linux/sysfs.h> #include <linux/export.h> #include <linux/init.h> #include <linux/kexec.h> #include <linux/profile.h> #include <linux/stat.h> #include <linux/sched.h> #include <linux/capability.h> #include <linux/compiler.h> #include <linux/rcupdate.h> /* rcu_expedited and rcu_normal */ #if defined(__LITTLE_ENDIAN) #define CPU_BYTEORDER_STRING "little" #elif defined(__BIG_ENDIAN) #define CPU_BYTEORDER_STRING "big" #else #error Unknown byteorder #endif #define KERNEL_ATTR_RO(_name) \ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) #define KERNEL_ATTR_RW(_name) \ static struct kobj_attribute _name##_attr = __ATTR_RW(_name) /* current uevent sequence number */ static ssize_t uevent_seqnum_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%llu\n", (u64)atomic64_read(&uevent_seqnum)); } KERNEL_ATTR_RO(uevent_seqnum); /* cpu byteorder */ static ssize_t cpu_byteorder_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", CPU_BYTEORDER_STRING); } KERNEL_ATTR_RO(cpu_byteorder); /* address bits */ static ssize_t address_bits_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%zu\n", sizeof(void *) * 8 /* CHAR_BIT */); } KERNEL_ATTR_RO(address_bits); #ifdef CONFIG_UEVENT_HELPER /* uevent helper program, used during early boot */ static ssize_t uevent_helper_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", uevent_helper); } static ssize_t uevent_helper_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { if (count+1 > UEVENT_HELPER_PATH_LEN) return -ENOENT; memcpy(uevent_helper, buf, count); uevent_helper[count] = '\0'; if (count && uevent_helper[count-1] == '\n') uevent_helper[count-1] = '\0'; return count; } KERNEL_ATTR_RW(uevent_helper); #endif #ifdef CONFIG_PROFILING static ssize_t profiling_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", prof_on); } static ssize_t profiling_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { int ret; static DEFINE_MUTEX(lock); /* * We need serialization, for profile_setup() initializes prof_on * value and profile_init() must not reallocate prof_buffer after * once allocated. */ guard(mutex)(&lock); if (prof_on) return -EEXIST; /* * This eventually calls into get_option() which * has a ton of callers and is not const. It is * easiest to cast it away here. */ profile_setup((char *)buf); ret = profile_init(); if (ret) return ret; ret = create_proc_profile(); if (ret) return ret; return count; } KERNEL_ATTR_RW(profiling); #endif #ifdef CONFIG_KEXEC_CORE static ssize_t kexec_loaded_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", !!kexec_image); } KERNEL_ATTR_RO(kexec_loaded); #ifdef CONFIG_CRASH_DUMP static ssize_t kexec_crash_loaded_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", kexec_crash_loaded()); } KERNEL_ATTR_RO(kexec_crash_loaded); static ssize_t kexec_crash_size_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { ssize_t size = crash_get_memory_size(); if (size < 0) return size; return sysfs_emit(buf, "%zd\n", size); } static ssize_t kexec_crash_size_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { unsigned long cnt; int ret; if (kstrtoul(buf, 0, &cnt)) return -EINVAL; ret = crash_shrink_memory(cnt); return ret < 0 ? ret : count; } KERNEL_ATTR_RW(kexec_crash_size); #endif /* CONFIG_CRASH_DUMP*/ #endif /* CONFIG_KEXEC_CORE */ #ifdef CONFIG_VMCORE_INFO static ssize_t vmcoreinfo_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { phys_addr_t vmcore_base = paddr_vmcoreinfo_note(); return sysfs_emit(buf, "%pa %x\n", &vmcore_base, (unsigned int)VMCOREINFO_NOTE_SIZE); } KERNEL_ATTR_RO(vmcoreinfo); #ifdef CONFIG_CRASH_HOTPLUG static ssize_t crash_elfcorehdr_size_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { unsigned int sz = crash_get_elfcorehdr_size(); return sysfs_emit(buf, "%u\n", sz); } KERNEL_ATTR_RO(crash_elfcorehdr_size); #endif #endif /* CONFIG_VMCORE_INFO */ /* whether file capabilities are enabled */ static ssize_t fscaps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", file_caps_enabled); } KERNEL_ATTR_RO(fscaps); #ifndef CONFIG_TINY_RCU int rcu_expedited; static ssize_t rcu_expedited_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", READ_ONCE(rcu_expedited)); } static ssize_t rcu_expedited_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { if (kstrtoint(buf, 0, &rcu_expedited)) return -EINVAL; return count; } KERNEL_ATTR_RW(rcu_expedited); int rcu_normal; static ssize_t rcu_normal_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", READ_ONCE(rcu_normal)); } static ssize_t rcu_normal_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { if (kstrtoint(buf, 0, &rcu_normal)) return -EINVAL; return count; } KERNEL_ATTR_RW(rcu_normal); #endif /* #ifndef CONFIG_TINY_RCU */ /* * Make /sys/kernel/notes give the raw contents of our kernel .notes section. */ extern const void __start_notes; extern const void __stop_notes; #define notes_size (&__stop_notes - &__start_notes) static __ro_after_init BIN_ATTR_SIMPLE_RO(notes); struct kobject *kernel_kobj; EXPORT_SYMBOL_GPL(kernel_kobj); static struct attribute * kernel_attrs[] = { &fscaps_attr.attr, &uevent_seqnum_attr.attr, &cpu_byteorder_attr.attr, &address_bits_attr.attr, #ifdef CONFIG_UEVENT_HELPER &uevent_helper_attr.attr, #endif #ifdef CONFIG_PROFILING &profiling_attr.attr, #endif #ifdef CONFIG_KEXEC_CORE &kexec_loaded_attr.attr, #ifdef CONFIG_CRASH_DUMP &kexec_crash_loaded_attr.attr, &kexec_crash_size_attr.attr, #endif #endif #ifdef CONFIG_VMCORE_INFO &vmcoreinfo_attr.attr, #ifdef CONFIG_CRASH_HOTPLUG &crash_elfcorehdr_size_attr.attr, #endif #endif #ifndef CONFIG_TINY_RCU &rcu_expedited_attr.attr, &rcu_normal_attr.attr, #endif NULL }; static const struct attribute_group kernel_attr_group = { .attrs = kernel_attrs, }; static int __init ksysfs_init(void) { int error; kernel_kobj = kobject_create_and_add("kernel", NULL); if (!kernel_kobj) { error = -ENOMEM; goto exit; } error = sysfs_create_group(kernel_kobj, &kernel_attr_group); if (error) goto kset_exit; if (notes_size > 0) { bin_attr_notes.private = (void *)&__start_notes; bin_attr_notes.size = notes_size; error = sysfs_create_bin_file(kernel_kobj, &bin_attr_notes); if (error) goto group_exit; } return 0; group_exit: sysfs_remove_group(kernel_kobj, &kernel_attr_group); kset_exit: kobject_put(kernel_kobj); exit: return error; } core_initcall(ksysfs_init); |
| 67 67 65 4 4 64 1 63 65 6 6 6 6 64 2 63 65 56 8 64 8 57 65 2 3 55 56 55 39 3 39 39 38 38 38 53 2 52 52 51 51 50 36 24 12 36 3 35 7 7 6 27 1 27 17 1 17 201 77 18 142 71 26 25 98 57 20 56 55 35 44 5 200 198 41 41 40 198 178 278 281 47 39 38 56 51 51 99 100 100 32 31 29 22 7 30 30 10 3 31 31 31 8 12 19 1 1 193 173 172 174 174 174 170 250 254 237 6 25 25 238 249 250 280 279 16 16 112 30 30 24 24 6 1 4 2 2 5 5 1 2 2 30 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 | // SPDX-License-Identifier: GPL-2.0-or-later /* Manage a process's keyrings * * Copyright (C) 2004-2005, 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/init.h> #include <linux/sched.h> #include <linux/sched/user.h> #include <linux/keyctl.h> #include <linux/fs.h> #include <linux/err.h> #include <linux/mutex.h> #include <linux/security.h> #include <linux/user_namespace.h> #include <linux/uaccess.h> #include <linux/init_task.h> #include <keys/request_key_auth-type.h> #include "internal.h" /* Session keyring create vs join semaphore */ static DEFINE_MUTEX(key_session_mutex); /* The root user's tracking struct */ struct key_user root_key_user = { .usage = REFCOUNT_INIT(3), .cons_lock = __MUTEX_INITIALIZER(root_key_user.cons_lock), .lock = __SPIN_LOCK_UNLOCKED(root_key_user.lock), .nkeys = ATOMIC_INIT(2), .nikeys = ATOMIC_INIT(2), .uid = GLOBAL_ROOT_UID, }; /* * Get or create a user register keyring. */ static struct key *get_user_register(struct user_namespace *user_ns) { struct key *reg_keyring = READ_ONCE(user_ns->user_keyring_register); if (reg_keyring) return reg_keyring; down_write(&user_ns->keyring_sem); /* Make sure there's a register keyring. It gets owned by the * user_namespace's owner. */ reg_keyring = user_ns->user_keyring_register; if (!reg_keyring) { reg_keyring = keyring_alloc(".user_reg", user_ns->owner, INVALID_GID, &init_cred, KEY_POS_WRITE | KEY_POS_SEARCH | KEY_USR_VIEW | KEY_USR_READ, 0, NULL, NULL); if (!IS_ERR(reg_keyring)) smp_store_release(&user_ns->user_keyring_register, reg_keyring); } up_write(&user_ns->keyring_sem); /* We don't return a ref since the keyring is pinned by the user_ns */ return reg_keyring; } /* * Look up the user and user session keyrings for the current process's UID, * creating them if they don't exist. */ int look_up_user_keyrings(struct key **_user_keyring, struct key **_user_session_keyring) { const struct cred *cred = current_cred(); struct user_namespace *user_ns = current_user_ns(); struct key *reg_keyring, *uid_keyring, *session_keyring; key_perm_t user_keyring_perm; key_ref_t uid_keyring_r, session_keyring_r; uid_t uid = from_kuid(user_ns, cred->user->uid); char buf[20]; int ret; user_keyring_perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL; kenter("%u", uid); reg_keyring = get_user_register(user_ns); if (IS_ERR(reg_keyring)) return PTR_ERR(reg_keyring); down_write(&user_ns->keyring_sem); ret = 0; /* Get the user keyring. Note that there may be one in existence * already as it may have been pinned by a session, but the user_struct * pointing to it may have been destroyed by setuid. */ snprintf(buf, sizeof(buf), "_uid.%u", uid); uid_keyring_r = keyring_search(make_key_ref(reg_keyring, true), &key_type_keyring, buf, false); kdebug("_uid %p", uid_keyring_r); if (uid_keyring_r == ERR_PTR(-EAGAIN)) { uid_keyring = keyring_alloc(buf, cred->user->uid, INVALID_GID, cred, user_keyring_perm, KEY_ALLOC_UID_KEYRING | KEY_ALLOC_IN_QUOTA, NULL, reg_keyring); if (IS_ERR(uid_keyring)) { ret = PTR_ERR(uid_keyring); goto error; } } else if (IS_ERR(uid_keyring_r)) { ret = PTR_ERR(uid_keyring_r); goto error; } else { uid_keyring = key_ref_to_ptr(uid_keyring_r); } /* Get a default session keyring (which might also exist already) */ snprintf(buf, sizeof(buf), "_uid_ses.%u", uid); session_keyring_r = keyring_search(make_key_ref(reg_keyring, true), &key_type_keyring, buf, false); kdebug("_uid_ses %p", session_keyring_r); if (session_keyring_r == ERR_PTR(-EAGAIN)) { session_keyring = keyring_alloc(buf, cred->user->uid, INVALID_GID, cred, user_keyring_perm, KEY_ALLOC_UID_KEYRING | KEY_ALLOC_IN_QUOTA, NULL, NULL); if (IS_ERR(session_keyring)) { ret = PTR_ERR(session_keyring); goto error_release; } /* We install a link from the user session keyring to * the user keyring. */ ret = key_link(session_keyring, uid_keyring); if (ret < 0) goto error_release_session; /* And only then link the user-session keyring to the * register. */ ret = key_link(reg_keyring, session_keyring); if (ret < 0) goto error_release_session; } else if (IS_ERR(session_keyring_r)) { ret = PTR_ERR(session_keyring_r); goto error_release; } else { session_keyring = key_ref_to_ptr(session_keyring_r); } up_write(&user_ns->keyring_sem); if (_user_session_keyring) *_user_session_keyring = session_keyring; else key_put(session_keyring); if (_user_keyring) *_user_keyring = uid_keyring; else key_put(uid_keyring); kleave(" = 0"); return 0; error_release_session: key_put(session_keyring); error_release: key_put(uid_keyring); error: up_write(&user_ns->keyring_sem); kleave(" = %d", ret); return ret; } /* * Get the user session keyring if it exists, but don't create it if it * doesn't. */ struct key *get_user_session_keyring_rcu(const struct cred *cred) { struct key *reg_keyring = READ_ONCE(cred->user_ns->user_keyring_register); key_ref_t session_keyring_r; char buf[20]; struct keyring_search_context ctx = { .index_key.type = &key_type_keyring, .index_key.description = buf, .cred = cred, .match_data.cmp = key_default_cmp, .match_data.raw_data = buf, .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, .flags = KEYRING_SEARCH_DO_STATE_CHECK, }; if (!reg_keyring) return NULL; ctx.index_key.desc_len = snprintf(buf, sizeof(buf), "_uid_ses.%u", from_kuid(cred->user_ns, cred->user->uid)); session_keyring_r = keyring_search_rcu(make_key_ref(reg_keyring, true), &ctx); if (IS_ERR(session_keyring_r)) return NULL; return key_ref_to_ptr(session_keyring_r); } /* * Install a thread keyring to the given credentials struct if it didn't have * one already. This is allowed to overrun the quota. * * Return: 0 if a thread keyring is now present; -errno on failure. */ int install_thread_keyring_to_cred(struct cred *new) { struct key *keyring; if (new->thread_keyring) return 0; keyring = keyring_alloc("_tid", new->uid, new->gid, new, KEY_POS_ALL | KEY_USR_VIEW, KEY_ALLOC_QUOTA_OVERRUN, NULL, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); new->thread_keyring = keyring; return 0; } /* * Install a thread keyring to the current task if it didn't have one already. * * Return: 0 if a thread keyring is now present; -errno on failure. */ static int install_thread_keyring(void) { struct cred *new; int ret; new = prepare_creds(); if (!new) return -ENOMEM; ret = install_thread_keyring_to_cred(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); } /* * Install a process keyring to the given credentials struct if it didn't have * one already. This is allowed to overrun the quota. * * Return: 0 if a process keyring is now present; -errno on failure. */ int install_process_keyring_to_cred(struct cred *new) { struct key *keyring; if (new->process_keyring) return 0; keyring = keyring_alloc("_pid", new->uid, new->gid, new, KEY_POS_ALL | KEY_USR_VIEW, KEY_ALLOC_QUOTA_OVERRUN, NULL, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); new->process_keyring = keyring; return 0; } /* * Install a process keyring to the current task if it didn't have one already. * * Return: 0 if a process keyring is now present; -errno on failure. */ static int install_process_keyring(void) { struct cred *new; int ret; new = prepare_creds(); if (!new) return -ENOMEM; ret = install_process_keyring_to_cred(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); } /* * Install the given keyring as the session keyring of the given credentials * struct, replacing the existing one if any. If the given keyring is NULL, * then install a new anonymous session keyring. * @cred can not be in use by any task yet. * * Return: 0 on success; -errno on failure. */ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) { unsigned long flags; struct key *old; might_sleep(); /* create an empty session keyring */ if (!keyring) { flags = KEY_ALLOC_QUOTA_OVERRUN; if (cred->session_keyring) flags = KEY_ALLOC_IN_QUOTA; keyring = keyring_alloc("_ses", cred->uid, cred->gid, cred, KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ, flags, NULL, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); } else { __key_get(keyring); } /* install the keyring */ old = cred->session_keyring; cred->session_keyring = keyring; if (old) key_put(old); return 0; } /* * Install the given keyring as the session keyring of the current task, * replacing the existing one if any. If the given keyring is NULL, then * install a new anonymous session keyring. * * Return: 0 on success; -errno on failure. */ static int install_session_keyring(struct key *keyring) { struct cred *new; int ret; new = prepare_creds(); if (!new) return -ENOMEM; ret = install_session_keyring_to_cred(new, keyring); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); } /* * Handle the fsuid changing. */ void key_fsuid_changed(struct cred *new_cred) { /* update the ownership of the thread keyring */ if (new_cred->thread_keyring) { down_write(&new_cred->thread_keyring->sem); new_cred->thread_keyring->uid = new_cred->fsuid; up_write(&new_cred->thread_keyring->sem); } } /* * Handle the fsgid changing. */ void key_fsgid_changed(struct cred *new_cred) { /* update the ownership of the thread keyring */ if (new_cred->thread_keyring) { down_write(&new_cred->thread_keyring->sem); new_cred->thread_keyring->gid = new_cred->fsgid; up_write(&new_cred->thread_keyring->sem); } } /* * Search the process keyrings attached to the supplied cred for the first * matching key under RCU conditions (the caller must be holding the RCU read * lock). * * The search criteria are the type and the match function. The description is * given to the match function as a parameter, but doesn't otherwise influence * the search. Typically the match function will compare the description * parameter to the key's description. * * This can only search keyrings that grant Search permission to the supplied * credentials. Keyrings linked to searched keyrings will also be searched if * they grant Search permission too. Keys can only be found if they grant * Search permission to the credentials. * * Returns a pointer to the key with the key usage count incremented if * successful, -EAGAIN if we didn't find any matching key or -ENOKEY if we only * matched negative keys. * * In the case of a successful return, the possession attribute is set on the * returned key reference. */ key_ref_t search_cred_keyrings_rcu(struct keyring_search_context *ctx) { struct key *user_session; key_ref_t key_ref, ret, err; const struct cred *cred = ctx->cred; /* we want to return -EAGAIN or -ENOKEY if any of the keyrings were * searchable, but we failed to find a key or we found a negative key; * otherwise we want to return a sample error (probably -EACCES) if * none of the keyrings were searchable * * in terms of priority: success > -ENOKEY > -EAGAIN > other error */ key_ref = NULL; ret = NULL; err = ERR_PTR(-EAGAIN); /* search the thread keyring first */ if (cred->thread_keyring) { key_ref = keyring_search_rcu( make_key_ref(cred->thread_keyring, 1), ctx); if (!IS_ERR(key_ref)) goto found; switch (PTR_ERR(key_ref)) { case -EAGAIN: /* no key */ case -ENOKEY: /* negative key */ ret = key_ref; break; default: err = key_ref; break; } } /* search the process keyring second */ if (cred->process_keyring) { key_ref = keyring_search_rcu( make_key_ref(cred->process_keyring, 1), ctx); if (!IS_ERR(key_ref)) goto found; switch (PTR_ERR(key_ref)) { case -EAGAIN: /* no key */ if (ret) break; fallthrough; case -ENOKEY: /* negative key */ ret = key_ref; break; default: err = key_ref; break; } } /* search the session keyring */ if (cred->session_keyring) { key_ref = keyring_search_rcu( make_key_ref(cred->session_keyring, 1), ctx); if (!IS_ERR(key_ref)) goto found; switch (PTR_ERR(key_ref)) { case -EAGAIN: /* no key */ if (ret) break; fallthrough; case -ENOKEY: /* negative key */ ret = key_ref; break; default: err = key_ref; break; } } /* or search the user-session keyring */ else if ((user_session = get_user_session_keyring_rcu(cred))) { key_ref = keyring_search_rcu(make_key_ref(user_session, 1), ctx); key_put(user_session); if (!IS_ERR(key_ref)) goto found; switch (PTR_ERR(key_ref)) { case -EAGAIN: /* no key */ if (ret) break; fallthrough; case -ENOKEY: /* negative key */ ret = key_ref; break; default: err = key_ref; break; } } /* no key - decide on the error we're going to go for */ key_ref = ret ? ret : err; found: return key_ref; } /* * Search the process keyrings attached to the supplied cred for the first * matching key in the manner of search_my_process_keyrings(), but also search * the keys attached to the assumed authorisation key using its credentials if * one is available. * * The caller must be holding the RCU read lock. * * Return same as search_cred_keyrings_rcu(). */ key_ref_t search_process_keyrings_rcu(struct keyring_search_context *ctx) { struct request_key_auth *rka; key_ref_t key_ref, ret = ERR_PTR(-EACCES), err; key_ref = search_cred_keyrings_rcu(ctx); if (!IS_ERR(key_ref)) goto found; err = key_ref; /* if this process has an instantiation authorisation key, then we also * search the keyrings of the process mentioned there * - we don't permit access to request_key auth keys via this method */ if (ctx->cred->request_key_auth && ctx->cred == current_cred() && ctx->index_key.type != &key_type_request_key_auth ) { const struct cred *cred = ctx->cred; if (key_validate(cred->request_key_auth) == 0) { rka = ctx->cred->request_key_auth->payload.data[0]; //// was search_process_keyrings() [ie. recursive] ctx->cred = rka->cred; key_ref = search_cred_keyrings_rcu(ctx); ctx->cred = cred; if (!IS_ERR(key_ref)) goto found; ret = key_ref; } } /* no key - decide on the error we're going to go for */ if (err == ERR_PTR(-ENOKEY) || ret == ERR_PTR(-ENOKEY)) key_ref = ERR_PTR(-ENOKEY); else if (err == ERR_PTR(-EACCES)) key_ref = ret; else key_ref = err; found: return key_ref; } /* * See if the key we're looking at is the target key. */ bool lookup_user_key_possessed(const struct key *key, const struct key_match_data *match_data) { return key == match_data->raw_data; } /* * Look up a key ID given us by userspace with a given permissions mask to get * the key it refers to. * * Flags can be passed to request that special keyrings be created if referred * to directly, to permit partially constructed keys to be found and to skip * validity and permission checks on the found key. * * Returns a pointer to the key with an incremented usage count if successful; * -EINVAL if the key ID is invalid; -ENOKEY if the key ID does not correspond * to a key or the best found key was a negative key; -EKEYREVOKED or * -EKEYEXPIRED if the best found key was revoked or expired; -EACCES if the * found key doesn't grant the requested permit or the LSM denied access to it; * or -ENOMEM if a special keyring couldn't be created. * * In the case of a successful return, the possession attribute is set on the * returned key reference. */ key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags, enum key_need_perm need_perm) { struct keyring_search_context ctx = { .match_data.cmp = lookup_user_key_possessed, .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, .flags = (KEYRING_SEARCH_NO_STATE_CHECK | KEYRING_SEARCH_RECURSE), }; struct request_key_auth *rka; struct key *key, *user_session; key_ref_t key_ref, skey_ref; int ret; try_again: ctx.cred = get_current_cred(); key_ref = ERR_PTR(-ENOKEY); switch (id) { case KEY_SPEC_THREAD_KEYRING: if (!ctx.cred->thread_keyring) { if (!(lflags & KEY_LOOKUP_CREATE)) goto error; ret = install_thread_keyring(); if (ret < 0) { key_ref = ERR_PTR(ret); goto error; } goto reget_creds; } key = ctx.cred->thread_keyring; __key_get(key); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_PROCESS_KEYRING: if (!ctx.cred->process_keyring) { if (!(lflags & KEY_LOOKUP_CREATE)) goto error; ret = install_process_keyring(); if (ret < 0) { key_ref = ERR_PTR(ret); goto error; } goto reget_creds; } key = ctx.cred->process_keyring; __key_get(key); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_SESSION_KEYRING: if (!ctx.cred->session_keyring) { /* always install a session keyring upon access if one * doesn't exist yet */ ret = look_up_user_keyrings(NULL, &user_session); if (ret < 0) goto error; if (lflags & KEY_LOOKUP_CREATE) ret = join_session_keyring(NULL); else ret = install_session_keyring(user_session); key_put(user_session); if (ret < 0) goto error; goto reget_creds; } else if (test_bit(KEY_FLAG_UID_KEYRING, &ctx.cred->session_keyring->flags) && lflags & KEY_LOOKUP_CREATE) { ret = join_session_keyring(NULL); if (ret < 0) goto error; goto reget_creds; } key = ctx.cred->session_keyring; __key_get(key); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_USER_KEYRING: ret = look_up_user_keyrings(&key, NULL); if (ret < 0) goto error; key_ref = make_key_ref(key, 1); break; case KEY_SPEC_USER_SESSION_KEYRING: ret = look_up_user_keyrings(NULL, &key); if (ret < 0) goto error; key_ref = make_key_ref(key, 1); break; case KEY_SPEC_GROUP_KEYRING: /* group keyrings are not yet supported */ key_ref = ERR_PTR(-EINVAL); goto error; case KEY_SPEC_REQKEY_AUTH_KEY: key = ctx.cred->request_key_auth; if (!key) goto error; __key_get(key); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_REQUESTOR_KEYRING: if (!ctx.cred->request_key_auth) goto error; down_read(&ctx.cred->request_key_auth->sem); if (test_bit(KEY_FLAG_REVOKED, &ctx.cred->request_key_auth->flags)) { key_ref = ERR_PTR(-EKEYREVOKED); key = NULL; } else { rka = ctx.cred->request_key_auth->payload.data[0]; key = rka->dest_keyring; __key_get(key); } up_read(&ctx.cred->request_key_auth->sem); if (!key) goto error; key_ref = make_key_ref(key, 1); break; default: key_ref = ERR_PTR(-EINVAL); if (id < 1) goto error; key = key_lookup(id); if (IS_ERR(key)) { key_ref = ERR_CAST(key); goto error; } key_ref = make_key_ref(key, 0); /* check to see if we possess the key */ ctx.index_key = key->index_key; ctx.match_data.raw_data = key; kdebug("check possessed"); rcu_read_lock(); skey_ref = search_process_keyrings_rcu(&ctx); rcu_read_unlock(); kdebug("possessed=%p", skey_ref); if (!IS_ERR(skey_ref)) { key_put(key); key_ref = skey_ref; } break; } /* unlink does not use the nominated key in any way, so can skip all * the permission checks as it is only concerned with the keyring */ if (need_perm != KEY_NEED_UNLINK) { if (!(lflags & KEY_LOOKUP_PARTIAL)) { ret = wait_for_key_construction(key, true); switch (ret) { case -ERESTARTSYS: goto invalid_key; default: if (need_perm != KEY_AUTHTOKEN_OVERRIDE && need_perm != KEY_DEFER_PERM_CHECK) goto invalid_key; break; case 0: break; } } else if (need_perm != KEY_DEFER_PERM_CHECK) { ret = key_validate(key); if (ret < 0) goto invalid_key; } ret = -EIO; if (!(lflags & KEY_LOOKUP_PARTIAL) && key_read_state(key) == KEY_IS_UNINSTANTIATED) goto invalid_key; } /* check the permissions */ ret = key_task_permission(key_ref, ctx.cred, need_perm); if (ret < 0) goto invalid_key; key->last_used_at = ktime_get_real_seconds(); error: put_cred(ctx.cred); return key_ref; invalid_key: key_ref_put(key_ref); key_ref = ERR_PTR(ret); goto error; /* if we attempted to install a keyring, then it may have caused new * creds to be installed */ reget_creds: put_cred(ctx.cred); goto try_again; } EXPORT_SYMBOL(lookup_user_key); /* * Join the named keyring as the session keyring if possible else attempt to * create a new one of that name and join that. * * If the name is NULL, an empty anonymous keyring will be installed as the * session keyring. * * Named session keyrings are joined with a semaphore held to prevent the * keyrings from going away whilst the attempt is made to going them and also * to prevent a race in creating compatible session keyrings. */ long join_session_keyring(const char *name) { const struct cred *old; struct cred *new; struct key *keyring; long ret, serial; new = prepare_creds(); if (!new) return -ENOMEM; old = current_cred(); /* if no name is provided, install an anonymous keyring */ if (!name) { ret = install_session_keyring_to_cred(new, NULL); if (ret < 0) goto error; serial = new->session_keyring->serial; ret = commit_creds(new); if (ret == 0) ret = serial; goto okay; } /* allow the user to join or create a named keyring */ mutex_lock(&key_session_mutex); /* look for an existing keyring of this name */ keyring = find_keyring_by_name(name, false); if (PTR_ERR(keyring) == -ENOKEY) { /* not found - try and create a new one */ keyring = keyring_alloc( name, old->uid, old->gid, old, KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_LINK, KEY_ALLOC_IN_QUOTA, NULL, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error2; } } else if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error2; } else if (keyring == new->session_keyring) { ret = 0; goto error3; } /* we've got a keyring - now to install it */ ret = install_session_keyring_to_cred(new, keyring); if (ret < 0) goto error3; commit_creds(new); mutex_unlock(&key_session_mutex); ret = keyring->serial; key_put(keyring); okay: return ret; error3: key_put(keyring); error2: mutex_unlock(&key_session_mutex); error: abort_creds(new); return ret; } /* * Replace a process's session keyring on behalf of one of its children when * the target process is about to resume userspace execution. */ void key_change_session_keyring(struct callback_head *twork) { const struct cred *old = current_cred(); struct cred *new = container_of(twork, struct cred, rcu); if (unlikely(current->flags & PF_EXITING)) { put_cred(new); return; } /* If get_ucounts fails more bits are needed in the refcount */ if (unlikely(!get_ucounts(old->ucounts))) { WARN_ONCE(1, "In %s get_ucounts failed\n", __func__); put_cred(new); return; } new-> uid = old-> uid; new-> euid = old-> euid; new-> suid = old-> suid; new->fsuid = old->fsuid; new-> gid = old-> gid; new-> egid = old-> egid; new-> sgid = old-> sgid; new->fsgid = old->fsgid; new->user = get_uid(old->user); new->ucounts = old->ucounts; new->user_ns = get_user_ns(old->user_ns); new->group_info = get_group_info(old->group_info); new->securebits = old->securebits; new->cap_inheritable = old->cap_inheritable; new->cap_permitted = old->cap_permitted; new->cap_effective = old->cap_effective; new->cap_ambient = old->cap_ambient; new->cap_bset = old->cap_bset; new->jit_keyring = old->jit_keyring; new->thread_keyring = key_get(old->thread_keyring); new->process_keyring = key_get(old->process_keyring); security_transfer_creds(new, old); commit_creds(new); } /* * Make sure that root's user and user-session keyrings exist. */ static int __init init_root_keyring(void) { return look_up_user_keyrings(NULL, NULL); } late_initcall(init_root_keyring); |
| 19 10 5 6 2 5 5 12 6 12 12 8 8 4 8 8 8 3 3 10 10 7 10 10 10 10 10 14 14 1 1 6 6 6 6 6 7 7 7 1 12 2 11 6 6 6 6 12 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 13 13 13 13 1 13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 | // SPDX-License-Identifier: GPL-2.0-or-later /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> */ #include <net/inet_dscp.h> #include <net/ip.h> #include "ipvlan.h" static u32 ipvlan_jhash_secret __read_mostly; void ipvlan_init_secret(void) { net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret)); } void ipvlan_count_rx(const struct ipvl_dev *ipvlan, unsigned int len, bool success, bool mcast) { if (likely(success)) { struct ipvl_pcpu_stats *pcptr; pcptr = this_cpu_ptr(ipvlan->pcpu_stats); u64_stats_update_begin(&pcptr->syncp); u64_stats_inc(&pcptr->rx_pkts); u64_stats_add(&pcptr->rx_bytes, len); if (mcast) u64_stats_inc(&pcptr->rx_mcast); u64_stats_update_end(&pcptr->syncp); } else { this_cpu_inc(ipvlan->pcpu_stats->rx_errs); } } EXPORT_SYMBOL_GPL(ipvlan_count_rx); #if IS_ENABLED(CONFIG_IPV6) static u8 ipvlan_get_v6_hash(const void *iaddr) { const struct in6_addr *ip6_addr = iaddr; return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) & IPVLAN_HASH_MASK; } #else static u8 ipvlan_get_v6_hash(const void *iaddr) { return 0; } #endif static u8 ipvlan_get_v4_hash(const void *iaddr) { const struct in_addr *ip4_addr = iaddr; return jhash_1word(ip4_addr->s_addr, ipvlan_jhash_secret) & IPVLAN_HASH_MASK; } static bool addr_equal(bool is_v6, struct ipvl_addr *addr, const void *iaddr) { if (!is_v6 && addr->atype == IPVL_IPV4) { struct in_addr *i4addr = (struct in_addr *)iaddr; return addr->ip4addr.s_addr == i4addr->s_addr; #if IS_ENABLED(CONFIG_IPV6) } else if (is_v6 && addr->atype == IPVL_IPV6) { struct in6_addr *i6addr = (struct in6_addr *)iaddr; return ipv6_addr_equal(&addr->ip6addr, i6addr); #endif } return false; } static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, const void *iaddr, bool is_v6) { struct ipvl_addr *addr; u8 hash; hash = is_v6 ? ipvlan_get_v6_hash(iaddr) : ipvlan_get_v4_hash(iaddr); hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) if (addr_equal(is_v6, addr, iaddr)) return addr; return NULL; } void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr) { struct ipvl_port *port = ipvlan->port; u8 hash; hash = (addr->atype == IPVL_IPV6) ? ipvlan_get_v6_hash(&addr->ip6addr) : ipvlan_get_v4_hash(&addr->ip4addr); if (hlist_unhashed(&addr->hlnode)) hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]); } void ipvlan_ht_addr_del(struct ipvl_addr *addr) { hlist_del_init_rcu(&addr->hlnode); } struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, const void *iaddr, bool is_v6) { struct ipvl_addr *addr, *ret = NULL; rcu_read_lock(); list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) { if (addr_equal(is_v6, addr, iaddr)) { ret = addr; break; } } rcu_read_unlock(); return ret; } bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6) { struct ipvl_dev *ipvlan; bool ret = false; rcu_read_lock(); list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) { ret = true; break; } } rcu_read_unlock(); return ret; } void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type) { void *lyr3h = NULL; switch (skb->protocol) { case htons(ETH_P_ARP): { struct arphdr *arph; if (unlikely(!pskb_may_pull(skb, arp_hdr_len(port->dev)))) return NULL; arph = arp_hdr(skb); *type = IPVL_ARP; lyr3h = arph; break; } case htons(ETH_P_IP): { u32 pktlen; struct iphdr *ip4h; if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h)))) return NULL; ip4h = ip_hdr(skb); pktlen = skb_ip_totlen(skb); if (ip4h->ihl < 5 || ip4h->version != 4) return NULL; if (skb->len < pktlen || pktlen < (ip4h->ihl * 4)) return NULL; *type = IPVL_IPV4; lyr3h = ip4h; break; } #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): { struct ipv6hdr *ip6h; if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h)))) return NULL; ip6h = ipv6_hdr(skb); if (ip6h->version != 6) return NULL; *type = IPVL_IPV6; lyr3h = ip6h; /* Only Neighbour Solicitation pkts need different treatment */ if (ipv6_addr_any(&ip6h->saddr) && ip6h->nexthdr == NEXTHDR_ICMP) { struct icmp6hdr *icmph; if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph)))) return NULL; ip6h = ipv6_hdr(skb); icmph = (struct icmp6hdr *)(ip6h + 1); if (icmph->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) { /* Need to access the ipv6 address in body */ if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph) + sizeof(struct in6_addr)))) return NULL; ip6h = ipv6_hdr(skb); icmph = (struct icmp6hdr *)(ip6h + 1); } *type = IPVL_ICMPV6; lyr3h = icmph; } break; } #endif default: return NULL; } return lyr3h; } unsigned int ipvlan_mac_hash(const unsigned char *addr) { u32 hash = jhash_1word(get_unaligned((u32 *)(addr + 2)), ipvlan_jhash_secret); return hash & IPVLAN_MAC_FILTER_MASK; } void ipvlan_process_multicast(struct work_struct *work) { struct ipvl_port *port = container_of(work, struct ipvl_port, wq); struct ethhdr *ethh; struct ipvl_dev *ipvlan; struct sk_buff *skb, *nskb; struct sk_buff_head list; unsigned int len; unsigned int mac_hash; int ret; u8 pkt_type; bool tx_pkt; __skb_queue_head_init(&list); spin_lock_bh(&port->backlog.lock); skb_queue_splice_tail_init(&port->backlog, &list); spin_unlock_bh(&port->backlog.lock); while ((skb = __skb_dequeue(&list)) != NULL) { struct net_device *dev = skb->dev; bool consumed = false; ethh = eth_hdr(skb); tx_pkt = IPVL_SKB_CB(skb)->tx_pkt; mac_hash = ipvlan_mac_hash(ethh->h_dest); if (ether_addr_equal(ethh->h_dest, port->dev->broadcast)) pkt_type = PACKET_BROADCAST; else pkt_type = PACKET_MULTICAST; rcu_read_lock(); list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { if (tx_pkt && (ipvlan->dev == skb->dev)) continue; if (!test_bit(mac_hash, ipvlan->mac_filters)) continue; if (!(ipvlan->dev->flags & IFF_UP)) continue; ret = NET_RX_DROP; len = skb->len + ETH_HLEN; nskb = skb_clone(skb, GFP_ATOMIC); local_bh_disable(); if (nskb) { consumed = true; nskb->pkt_type = pkt_type; nskb->dev = ipvlan->dev; if (tx_pkt) ret = dev_forward_skb(ipvlan->dev, nskb); else ret = netif_rx(nskb); } ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); local_bh_enable(); } rcu_read_unlock(); if (tx_pkt) { /* If the packet originated here, send it out. */ skb->dev = port->dev; skb->pkt_type = pkt_type; dev_queue_xmit(skb); } else { if (consumed) consume_skb(skb); else kfree_skb(skb); } dev_put(dev); cond_resched(); } } static void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev) { bool xnet = true; if (dev) xnet = !net_eq(dev_net(skb->dev), dev_net(dev)); skb_scrub_packet(skb, xnet); if (dev) skb->dev = dev; } static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb, bool local) { struct ipvl_dev *ipvlan = addr->master; struct net_device *dev = ipvlan->dev; unsigned int len; rx_handler_result_t ret = RX_HANDLER_CONSUMED; bool success = false; struct sk_buff *skb = *pskb; len = skb->len + ETH_HLEN; /* Only packets exchanged between two local slaves need to have * device-up check as well as skb-share check. */ if (local) { if (unlikely(!(dev->flags & IFF_UP))) { kfree_skb(skb); goto out; } skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) goto out; *pskb = skb; } if (local) { skb->pkt_type = PACKET_HOST; if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS) success = true; } else { skb->dev = dev; ret = RX_HANDLER_ANOTHER; success = true; } out: ipvlan_count_rx(ipvlan, len, success, false); return ret; } struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h, int addr_type, bool use_dest) { struct ipvl_addr *addr = NULL; switch (addr_type) { #if IS_ENABLED(CONFIG_IPV6) case IPVL_IPV6: { struct ipv6hdr *ip6h; struct in6_addr *i6addr; ip6h = (struct ipv6hdr *)lyr3h; i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr; addr = ipvlan_ht_addr_lookup(port, i6addr, true); break; } case IPVL_ICMPV6: { struct nd_msg *ndmh; struct in6_addr *i6addr; /* Make sure that the NeighborSolicitation ICMPv6 packets * are handled to avoid DAD issue. */ ndmh = (struct nd_msg *)lyr3h; if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) { i6addr = &ndmh->target; addr = ipvlan_ht_addr_lookup(port, i6addr, true); } break; } #endif case IPVL_IPV4: { struct iphdr *ip4h; __be32 *i4addr; ip4h = (struct iphdr *)lyr3h; i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr; addr = ipvlan_ht_addr_lookup(port, i4addr, false); break; } case IPVL_ARP: { struct arphdr *arph; unsigned char *arp_ptr; __be32 dip; arph = (struct arphdr *)lyr3h; arp_ptr = (unsigned char *)(arph + 1); if (use_dest) arp_ptr += (2 * port->dev->addr_len) + 4; else arp_ptr += port->dev->addr_len; memcpy(&dip, arp_ptr, 4); addr = ipvlan_ht_addr_lookup(port, &dip, false); break; } } return addr; } static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb) { struct net_device *dev = skb->dev; struct net *net = dev_net(dev); int err, ret = NET_XMIT_DROP; const struct iphdr *ip4h; struct rtable *rt; struct flowi4 fl4 = { .flowi4_oif = dev->ifindex, .flowi4_flags = FLOWI_FLAG_ANYSRC, .flowi4_mark = skb->mark, }; if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) goto err; ip4h = ip_hdr(skb); fl4.daddr = ip4h->daddr; fl4.saddr = ip4h->saddr; fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)); rt = ip_route_output_flow(net, &fl4, NULL); if (IS_ERR(rt)) goto err; if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { ip_rt_put(rt); goto err; } skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); err = ip_local_out(net, NULL, skb); if (unlikely(net_xmit_eval(err))) DEV_STATS_INC(dev, tx_errors); else ret = NET_XMIT_SUCCESS; goto out; err: DEV_STATS_INC(dev, tx_errors); kfree_skb(skb); out: return ret; } #if IS_ENABLED(CONFIG_IPV6) static noinline_for_stack int ipvlan_route_v6_outbound(struct net_device *dev, struct sk_buff *skb) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct flowi6 fl6 = { .flowi6_oif = dev->ifindex, .daddr = ip6h->daddr, .saddr = ip6h->saddr, .flowi6_flags = FLOWI_FLAG_ANYSRC, .flowlabel = ip6_flowinfo(ip6h), .flowi6_mark = skb->mark, .flowi6_proto = ip6h->nexthdr, }; struct dst_entry *dst; int err; dst = ip6_route_output(dev_net(dev), NULL, &fl6); err = dst->error; if (err) { dst_release(dst); return err; } skb_dst_set(skb, dst); return 0; } static int ipvlan_process_v6_outbound(struct sk_buff *skb) { struct net_device *dev = skb->dev; int err, ret = NET_XMIT_DROP; if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) { DEV_STATS_INC(dev, tx_errors); kfree_skb(skb); return ret; } err = ipvlan_route_v6_outbound(dev, skb); if (unlikely(err)) { DEV_STATS_INC(dev, tx_errors); kfree_skb(skb); return err; } memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); err = ip6_local_out(dev_net(dev), NULL, skb); if (unlikely(net_xmit_eval(err))) DEV_STATS_INC(dev, tx_errors); else ret = NET_XMIT_SUCCESS; return ret; } #else static int ipvlan_process_v6_outbound(struct sk_buff *skb) { return NET_XMIT_DROP; } #endif static int ipvlan_process_outbound(struct sk_buff *skb) { int ret = NET_XMIT_DROP; /* The ipvlan is a pseudo-L2 device, so the packets that we receive * will have L2; which need to discarded and processed further * in the net-ns of the main-device. */ if (skb_mac_header_was_set(skb)) { /* In this mode we dont care about * multicast and broadcast traffic */ struct ethhdr *ethh = eth_hdr(skb); if (is_multicast_ether_addr(ethh->h_dest)) { pr_debug_ratelimited( "Dropped {multi|broad}cast of type=[%x]\n", ntohs(skb->protocol)); kfree_skb(skb); goto out; } skb_pull(skb, sizeof(*ethh)); skb->mac_header = (typeof(skb->mac_header))~0U; skb_reset_network_header(skb); } if (skb->protocol == htons(ETH_P_IPV6)) ret = ipvlan_process_v6_outbound(skb); else if (skb->protocol == htons(ETH_P_IP)) ret = ipvlan_process_v4_outbound(skb); else { pr_warn_ratelimited("Dropped outbound packet type=%x\n", ntohs(skb->protocol)); kfree_skb(skb); } out: return ret; } static void ipvlan_multicast_enqueue(struct ipvl_port *port, struct sk_buff *skb, bool tx_pkt) { if (skb->protocol == htons(ETH_P_PAUSE)) { kfree_skb(skb); return; } /* Record that the deferred packet is from TX or RX path. By * looking at mac-addresses on packet will lead to erronus decisions. * (This would be true for a loopback-mode on master device or a * hair-pin mode of the switch.) */ IPVL_SKB_CB(skb)->tx_pkt = tx_pkt; spin_lock(&port->backlog.lock); if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) { dev_hold(skb->dev); __skb_queue_tail(&port->backlog, skb); spin_unlock(&port->backlog.lock); schedule_work(&port->wq); } else { spin_unlock(&port->backlog.lock); dev_core_stats_rx_dropped_inc(skb->dev); kfree_skb(skb); } } static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) { const struct ipvl_dev *ipvlan = netdev_priv(dev); void *lyr3h; struct ipvl_addr *addr; int addr_type; lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type); if (!lyr3h) goto out; if (!ipvlan_is_vepa(ipvlan->port)) { addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); if (addr) { if (ipvlan_is_private(ipvlan->port)) { consume_skb(skb); return NET_XMIT_DROP; } ipvlan_rcv_frame(addr, &skb, true); return NET_XMIT_SUCCESS; } } out: ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev); return ipvlan_process_outbound(skb); } static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) { const struct ipvl_dev *ipvlan = netdev_priv(dev); struct ethhdr *eth = skb_eth_hdr(skb); struct ipvl_addr *addr; void *lyr3h; int addr_type; if (!ipvlan_is_vepa(ipvlan->port) && ether_addr_equal(eth->h_dest, eth->h_source)) { lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type); if (lyr3h) { addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); if (addr) { if (ipvlan_is_private(ipvlan->port)) { consume_skb(skb); return NET_XMIT_DROP; } ipvlan_rcv_frame(addr, &skb, true); return NET_XMIT_SUCCESS; } } skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) return NET_XMIT_DROP; /* Packet definitely does not belong to any of the * virtual devices, but the dest is local. So forward * the skb for the main-dev. At the RX side we just return * RX_PASS for it to be processed further on the stack. */ dev_forward_skb(ipvlan->phy_dev, skb); return NET_XMIT_SUCCESS; } else if (is_multicast_ether_addr(eth->h_dest)) { skb_reset_mac_header(skb); ipvlan_skb_crossing_ns(skb, NULL); ipvlan_multicast_enqueue(ipvlan->port, skb, true); return NET_XMIT_SUCCESS; } skb->dev = ipvlan->phy_dev; return dev_queue_xmit(skb); } int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_port *port = ipvlan_port_get_rcu_bh(ipvlan->phy_dev); if (!port) goto out; if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) goto out; switch(port->mode) { case IPVLAN_MODE_L2: return ipvlan_xmit_mode_l2(skb, dev); case IPVLAN_MODE_L3: #ifdef CONFIG_IPVLAN_L3S case IPVLAN_MODE_L3S: #endif return ipvlan_xmit_mode_l3(skb, dev); } /* Should not reach here */ WARN_ONCE(true, "%s called for mode = [%x]\n", __func__, port->mode); out: kfree_skb(skb); return NET_XMIT_DROP; } static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port) { struct ethhdr *eth = eth_hdr(skb); struct ipvl_addr *addr; void *lyr3h; int addr_type; if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) { lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); if (!lyr3h) return true; addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false); if (addr) return false; } return true; } static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb, struct ipvl_port *port) { void *lyr3h; int addr_type; struct ipvl_addr *addr; struct sk_buff *skb = *pskb; rx_handler_result_t ret = RX_HANDLER_PASS; lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); if (!lyr3h) goto out; addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); if (addr) ret = ipvlan_rcv_frame(addr, pskb, false); out: return ret; } static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, struct ipvl_port *port) { struct sk_buff *skb = *pskb; struct ethhdr *eth = eth_hdr(skb); rx_handler_result_t ret = RX_HANDLER_PASS; if (is_multicast_ether_addr(eth->h_dest)) { if (ipvlan_external_frame(skb, port)) { struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); /* External frames are queued for device local * distribution, but a copy is given to master * straight away to avoid sending duplicates later * when work-queue processes this frame. This is * achieved by returning RX_HANDLER_PASS. */ if (nskb) { ipvlan_skb_crossing_ns(nskb, NULL); ipvlan_multicast_enqueue(port, nskb, false); } } } else { /* Perform like l3 mode for non-multicast packet */ ret = ipvlan_handle_mode_l3(pskb, port); } return ret; } rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct ipvl_port *port = ipvlan_port_get_rcu(skb->dev); if (!port) return RX_HANDLER_PASS; switch (port->mode) { case IPVLAN_MODE_L2: return ipvlan_handle_mode_l2(pskb, port); case IPVLAN_MODE_L3: return ipvlan_handle_mode_l3(pskb, port); #ifdef CONFIG_IPVLAN_L3S case IPVLAN_MODE_L3S: return RX_HANDLER_PASS; #endif } /* Should not reach here */ WARN_ONCE(true, "%s called for mode = [%x]\n", __func__, port->mode); kfree_skb(skb); return RX_HANDLER_CONSUMED; } |
| 9 9 3 3 1 1 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 | // SPDX-License-Identifier: GPL-2.0-only #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> #include <linux/module.h> #include <net/ip.h> #include <linux/ipv6.h> #include <linux/icmp.h> #include <net/ipv6.h> #include <net/tcp.h> #include <net/udp.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_tcpudp.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> MODULE_DESCRIPTION("Xtables: TCP, UDP and UDP-Lite match"); MODULE_LICENSE("GPL"); MODULE_ALIAS("xt_tcp"); MODULE_ALIAS("xt_udp"); MODULE_ALIAS("ipt_udp"); MODULE_ALIAS("ipt_tcp"); MODULE_ALIAS("ip6t_udp"); MODULE_ALIAS("ip6t_tcp"); MODULE_ALIAS("ipt_icmp"); MODULE_ALIAS("ip6t_icmp6"); /* Returns 1 if the port is matched by the range, 0 otherwise */ static inline bool port_match(u_int16_t min, u_int16_t max, u_int16_t port, bool invert) { return (port >= min && port <= max) ^ invert; } static bool tcp_find_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff, unsigned int optlen, bool invert, bool *hotdrop) { /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ const u_int8_t *op; u_int8_t _opt[60 - sizeof(struct tcphdr)]; unsigned int i; pr_debug("finding option\n"); if (!optlen) return invert; /* If we don't have the whole header, drop packet. */ op = skb_header_pointer(skb, protoff + sizeof(struct tcphdr), optlen, _opt); if (op == NULL) { *hotdrop = true; return false; } for (i = 0; i < optlen; ) { if (op[i] == option) return !invert; if (op[i] < 2) i++; else i += op[i+1]?:1; } return invert; } static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct tcphdr *th; struct tcphdr _tcph; const struct xt_tcp *tcpinfo = par->matchinfo; if (par->fragoff != 0) { /* To quote Alan: Don't allow a fragment of TCP 8 bytes in. Nobody normal causes this. Its a cracker trying to break in by doing a flag overwrite to pass the direction checks. */ if (par->fragoff == 1) { pr_debug("Dropping evil TCP offset=1 frag.\n"); par->hotdrop = true; } /* Must not be a fragment. */ return false; } th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); if (th == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ pr_debug("Dropping evil TCP offset=0 tinygram.\n"); par->hotdrop = true; return false; } if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1], ntohs(th->source), !!(tcpinfo->invflags & XT_TCP_INV_SRCPT))) return false; if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1], ntohs(th->dest), !!(tcpinfo->invflags & XT_TCP_INV_DSTPT))) return false; if (!NF_INVF(tcpinfo, XT_TCP_INV_FLAGS, (((unsigned char *)th)[13] & tcpinfo->flg_mask) == tcpinfo->flg_cmp)) return false; if (tcpinfo->option) { if (th->doff * 4 < sizeof(_tcph)) { par->hotdrop = true; return false; } if (!tcp_find_option(tcpinfo->option, skb, par->thoff, th->doff*4 - sizeof(_tcph), tcpinfo->invflags & XT_TCP_INV_OPTION, &par->hotdrop)) return false; } return true; } static int tcp_mt_check(const struct xt_mtchk_param *par) { const struct xt_tcp *tcpinfo = par->matchinfo; /* Must specify no unknown invflags */ return (tcpinfo->invflags & ~XT_TCP_INV_MASK) ? -EINVAL : 0; } static bool udp_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct udphdr *uh; struct udphdr _udph; const struct xt_udp *udpinfo = par->matchinfo; /* Must not be a fragment. */ if (par->fragoff != 0) return false; uh = skb_header_pointer(skb, par->thoff, sizeof(_udph), &_udph); if (uh == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ pr_debug("Dropping evil UDP tinygram.\n"); par->hotdrop = true; return false; } return port_match(udpinfo->spts[0], udpinfo->spts[1], ntohs(uh->source), !!(udpinfo->invflags & XT_UDP_INV_SRCPT)) && port_match(udpinfo->dpts[0], udpinfo->dpts[1], ntohs(uh->dest), !!(udpinfo->invflags & XT_UDP_INV_DSTPT)); } static int udp_mt_check(const struct xt_mtchk_param *par) { const struct xt_udp *udpinfo = par->matchinfo; /* Must specify no unknown invflags */ return (udpinfo->invflags & ~XT_UDP_INV_MASK) ? -EINVAL : 0; } /* Returns 1 if the type and code is matched by the range, 0 otherwise */ static bool type_code_in_range(u8 test_type, u8 min_code, u8 max_code, u8 type, u8 code) { return type == test_type && code >= min_code && code <= max_code; } static bool icmp_type_code_match(u8 test_type, u8 min_code, u8 max_code, u8 type, u8 code, bool invert) { return (test_type == 0xFF || type_code_in_range(test_type, min_code, max_code, type, code)) ^ invert; } static bool icmp6_type_code_match(u8 test_type, u8 min_code, u8 max_code, u8 type, u8 code, bool invert) { return type_code_in_range(test_type, min_code, max_code, type, code) ^ invert; } static bool icmp_match(const struct sk_buff *skb, struct xt_action_param *par) { const struct icmphdr *ic; struct icmphdr _icmph; const struct ipt_icmp *icmpinfo = par->matchinfo; /* Must not be a fragment. */ if (par->fragoff != 0) return false; ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); if (!ic) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ par->hotdrop = true; return false; } return icmp_type_code_match(icmpinfo->type, icmpinfo->code[0], icmpinfo->code[1], ic->type, ic->code, !!(icmpinfo->invflags & IPT_ICMP_INV)); } static bool icmp6_match(const struct sk_buff *skb, struct xt_action_param *par) { const struct icmp6hdr *ic; struct icmp6hdr _icmph; const struct ip6t_icmp *icmpinfo = par->matchinfo; /* Must not be a fragment. */ if (par->fragoff != 0) return false; ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); if (!ic) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ par->hotdrop = true; return false; } return icmp6_type_code_match(icmpinfo->type, icmpinfo->code[0], icmpinfo->code[1], ic->icmp6_type, ic->icmp6_code, !!(icmpinfo->invflags & IP6T_ICMP_INV)); } static int icmp_checkentry(const struct xt_mtchk_param *par) { const struct ipt_icmp *icmpinfo = par->matchinfo; return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0; } static int icmp6_checkentry(const struct xt_mtchk_param *par) { const struct ip6t_icmp *icmpinfo = par->matchinfo; return (icmpinfo->invflags & ~IP6T_ICMP_INV) ? -EINVAL : 0; } static struct xt_match tcpudp_mt_reg[] __read_mostly = { { .name = "tcp", .family = NFPROTO_IPV4, .checkentry = tcp_mt_check, .match = tcp_mt, .matchsize = sizeof(struct xt_tcp), .proto = IPPROTO_TCP, .me = THIS_MODULE, }, { .name = "tcp", .family = NFPROTO_IPV6, .checkentry = tcp_mt_check, .match = tcp_mt, .matchsize = sizeof(struct xt_tcp), .proto = IPPROTO_TCP, .me = THIS_MODULE, }, { .name = "udp", .family = NFPROTO_IPV4, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), .proto = IPPROTO_UDP, .me = THIS_MODULE, }, { .name = "udp", .family = NFPROTO_IPV6, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), .proto = IPPROTO_UDP, .me = THIS_MODULE, }, { .name = "udplite", .family = NFPROTO_IPV4, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), .proto = IPPROTO_UDPLITE, .me = THIS_MODULE, }, { .name = "udplite", .family = NFPROTO_IPV6, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), .proto = IPPROTO_UDPLITE, .me = THIS_MODULE, }, { .name = "icmp", .match = icmp_match, .matchsize = sizeof(struct ipt_icmp), .checkentry = icmp_checkentry, .proto = IPPROTO_ICMP, .family = NFPROTO_IPV4, .me = THIS_MODULE, }, { .name = "icmp6", .match = icmp6_match, .matchsize = sizeof(struct ip6t_icmp), .checkentry = icmp6_checkentry, .proto = IPPROTO_ICMPV6, .family = NFPROTO_IPV6, .me = THIS_MODULE, }, }; static int __init tcpudp_mt_init(void) { return xt_register_matches(tcpudp_mt_reg, ARRAY_SIZE(tcpudp_mt_reg)); } static void __exit tcpudp_mt_exit(void) { xt_unregister_matches(tcpudp_mt_reg, ARRAY_SIZE(tcpudp_mt_reg)); } module_init(tcpudp_mt_init); module_exit(tcpudp_mt_exit); |
| 17 18 26 27 29 20 25 70 2 2 70 2 37 37 37 37 37 12 16 16 16 9 16 16 16 16 7 9 16 16 14 16 16 6 5 6 1 6 1 9 2 18 18 18 6 14 8 6 16 11 11 11 11 11 7 11 11 10 6 3 2 1 1 3 1 3 3 35 19 35 2 2 4 1 1 3 1 2 1 4 1 1 1 2 1 1 1 1 4 3 7 7 3 3 3 2 4 52 11 11 2 2 2 1 4 6 2 6 7 7 52 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds * * Modified by Fred N. van Kempen, 01/29/93, to add line disciplines * which can be dynamically activated and de-activated by the line * discipline handling modules (like SLIP). */ #include <linux/bits.h> #include <linux/types.h> #include <linux/termios.h> #include <linux/errno.h> #include <linux/sched/signal.h> #include <linux/kernel.h> #include <linux/major.h> #include <linux/tty.h> #include <linux/fcntl.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/bitops.h> #include <linux/mutex.h> #include <linux/compat.h> #include <linux/termios_internal.h> #include "tty.h" #include <asm/io.h> #include <linux/uaccess.h> #undef DEBUG /* * Internal flag options for termios setting behavior */ #define TERMIOS_FLUSH BIT(0) #define TERMIOS_WAIT BIT(1) #define TERMIOS_TERMIO BIT(2) #define TERMIOS_OLD BIT(3) /** * tty_chars_in_buffer - characters pending * @tty: terminal * * Returns: the number of bytes of data in the device private output queue. If * no private method is supplied there is assumed to be no queue on the device. */ unsigned int tty_chars_in_buffer(struct tty_struct *tty) { if (tty->ops->chars_in_buffer) return tty->ops->chars_in_buffer(tty); return 0; } EXPORT_SYMBOL(tty_chars_in_buffer); /** * tty_write_room - write queue space * @tty: terminal * * Returns: the number of bytes that can be queued to this device at the present * time. The result should be treated as a guarantee and the driver cannot * offer a value it later shrinks by more than the number of bytes written. If * no method is provided, 2K is always returned and data may be lost as there * will be no flow control. */ unsigned int tty_write_room(struct tty_struct *tty) { if (tty->ops->write_room) return tty->ops->write_room(tty); return 2048; } EXPORT_SYMBOL(tty_write_room); /** * tty_driver_flush_buffer - discard internal buffer * @tty: terminal * * Discard the internal output buffer for this device. If no method is provided, * then either the buffer cannot be hardware flushed or there is no buffer * driver side. */ void tty_driver_flush_buffer(struct tty_struct *tty) { if (tty->ops->flush_buffer) tty->ops->flush_buffer(tty); } EXPORT_SYMBOL(tty_driver_flush_buffer); /** * tty_unthrottle - flow control * @tty: terminal * * Indicate that a @tty may continue transmitting data down the stack. Takes * the &tty_struct->termios_rwsem to protect against parallel * throttle/unthrottle and also to ensure the driver can consistently reference * its own termios data at this point when implementing software flow control. * * Drivers should however remember that the stack can issue a throttle, then * change flow control method, then unthrottle. */ void tty_unthrottle(struct tty_struct *tty) { down_write(&tty->termios_rwsem); if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) && tty->ops->unthrottle) tty->ops->unthrottle(tty); tty->flow_change = TTY_FLOW_NO_CHANGE; up_write(&tty->termios_rwsem); } EXPORT_SYMBOL(tty_unthrottle); /** * tty_throttle_safe - flow control * @tty: terminal * * Indicate that a @tty should stop transmitting data down the stack. * tty_throttle_safe() will only attempt throttle if @tty->flow_change is * %TTY_THROTTLE_SAFE. Prevents an accidental throttle due to race conditions * when throttling is conditional on factors evaluated prior to throttling. * * Returns: %true if @tty is throttled (or was already throttled) */ bool tty_throttle_safe(struct tty_struct *tty) { guard(mutex)(&tty->throttle_mutex); if (tty_throttled(tty)) return true; if (tty->flow_change != TTY_THROTTLE_SAFE) return false; set_bit(TTY_THROTTLED, &tty->flags); if (tty->ops->throttle) tty->ops->throttle(tty); return true; } /** * tty_unthrottle_safe - flow control * @tty: terminal * * Similar to tty_unthrottle() but will only attempt unthrottle if * @tty->flow_change is %TTY_UNTHROTTLE_SAFE. Prevents an accidental unthrottle * due to race conditions when unthrottling is conditional on factors evaluated * prior to unthrottling. * * Returns: %true if @tty is unthrottled (or was already unthrottled) */ bool tty_unthrottle_safe(struct tty_struct *tty) { guard(mutex)(&tty->throttle_mutex); if (!tty_throttled(tty)) return true; if (tty->flow_change != TTY_UNTHROTTLE_SAFE) return false; clear_bit(TTY_THROTTLED, &tty->flags); if (tty->ops->unthrottle) tty->ops->unthrottle(tty); return true; } /** * tty_wait_until_sent - wait for I/O to finish * @tty: tty we are waiting for * @timeout: how long we will wait * * Wait for characters pending in a tty driver to hit the wire, or for a * timeout to occur (eg due to flow control). * * Locking: none */ void tty_wait_until_sent(struct tty_struct *tty, long timeout) { if (!timeout) timeout = MAX_SCHEDULE_TIMEOUT; timeout = wait_event_interruptible_timeout(tty->write_wait, !tty_chars_in_buffer(tty), timeout); if (timeout <= 0) return; if (timeout == MAX_SCHEDULE_TIMEOUT) timeout = 0; if (tty->ops->wait_until_sent) tty->ops->wait_until_sent(tty, timeout); } EXPORT_SYMBOL(tty_wait_until_sent); /* * Termios Helper Methods */ static void unset_locked_termios(struct tty_struct *tty, const struct ktermios *old) { struct ktermios *termios = &tty->termios; struct ktermios *locked = &tty->termios_locked; int i; #define NOSET_MASK(x, y, z) (x = ((x) & ~(z)) | ((y) & (z))) NOSET_MASK(termios->c_iflag, old->c_iflag, locked->c_iflag); NOSET_MASK(termios->c_oflag, old->c_oflag, locked->c_oflag); NOSET_MASK(termios->c_cflag, old->c_cflag, locked->c_cflag); NOSET_MASK(termios->c_lflag, old->c_lflag, locked->c_lflag); termios->c_line = locked->c_line ? old->c_line : termios->c_line; for (i = 0; i < NCCS; i++) termios->c_cc[i] = locked->c_cc[i] ? old->c_cc[i] : termios->c_cc[i]; /* FIXME: What should we do for i/ospeed */ } /** * tty_termios_copy_hw - copy hardware settings * @new: new termios * @old: old termios * * Propagate the hardware specific terminal setting bits from the @old termios * structure to the @new one. This is used in cases where the hardware does not * support reconfiguration or as a helper in some cases where only minimal * reconfiguration is supported. */ void tty_termios_copy_hw(struct ktermios *new, const struct ktermios *old) { /* The bits a dumb device handles in software. Smart devices need to always provide a set_termios method */ new->c_cflag &= HUPCL | CREAD | CLOCAL; new->c_cflag |= old->c_cflag & ~(HUPCL | CREAD | CLOCAL); new->c_ispeed = old->c_ispeed; new->c_ospeed = old->c_ospeed; } EXPORT_SYMBOL(tty_termios_copy_hw); /** * tty_termios_hw_change - check for setting change * @a: termios * @b: termios to compare * * Check if any of the bits that affect a dumb device have changed between the * two termios structures, or a speed change is needed. * * Returns: %true if change is needed */ bool tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b) { if (a->c_ispeed != b->c_ispeed || a->c_ospeed != b->c_ospeed) return true; if ((a->c_cflag ^ b->c_cflag) & ~(HUPCL | CREAD | CLOCAL)) return true; return false; } EXPORT_SYMBOL(tty_termios_hw_change); /** * tty_get_char_size - get size of a character * @cflag: termios cflag value * * Returns: size (in bits) of a character depending on @cflag's %CSIZE setting */ unsigned char tty_get_char_size(unsigned int cflag) { switch (cflag & CSIZE) { case CS5: return 5; case CS6: return 6; case CS7: return 7; case CS8: default: return 8; } } EXPORT_SYMBOL_GPL(tty_get_char_size); /** * tty_get_frame_size - get size of a frame * @cflag: termios cflag value * * Get the size (in bits) of a frame depending on @cflag's %CSIZE, %CSTOPB, and * %PARENB setting. The result is a sum of character size, start and stop bits * -- one bit each -- second stop bit (if set), and parity bit (if set). * * Returns: size (in bits) of a frame depending on @cflag's setting. */ unsigned char tty_get_frame_size(unsigned int cflag) { unsigned char bits = 2 + tty_get_char_size(cflag); if (cflag & CSTOPB) bits++; if (cflag & PARENB) bits++; if (cflag & ADDRB) bits++; return bits; } EXPORT_SYMBOL_GPL(tty_get_frame_size); /** * tty_set_termios - update termios values * @tty: tty to update * @new_termios: desired new value * * Perform updates to the termios values set on this @tty. A master pty's * termios should never be set. * * Locking: &tty_struct->termios_rwsem */ int tty_set_termios(struct tty_struct *tty, struct ktermios *new_termios) { struct ktermios old_termios; struct tty_ldisc *ld; WARN_ON(tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER); /* * Perform the actual termios internal changes under lock. */ /* FIXME: we need to decide on some locking/ordering semantics for the set_termios notification eventually */ down_write(&tty->termios_rwsem); old_termios = tty->termios; tty->termios = *new_termios; unset_locked_termios(tty, &old_termios); /* Reset any ADDRB changes, ADDRB is changed through ->rs485_config() */ tty->termios.c_cflag ^= (tty->termios.c_cflag ^ old_termios.c_cflag) & ADDRB; if (tty->ops->set_termios) tty->ops->set_termios(tty, &old_termios); else tty_termios_copy_hw(&tty->termios, &old_termios); ld = tty_ldisc_ref(tty); if (ld != NULL) { if (ld->ops->set_termios) ld->ops->set_termios(tty, &old_termios); tty_ldisc_deref(ld); } up_write(&tty->termios_rwsem); return 0; } EXPORT_SYMBOL_GPL(tty_set_termios); /* * Translate a "termio" structure into a "termios". Ugh. */ __weak int user_termio_to_kernel_termios(struct ktermios *termios, struct termio __user *termio) { struct termio v; if (copy_from_user(&v, termio, sizeof(struct termio))) return -EFAULT; termios->c_iflag = (0xffff0000 & termios->c_iflag) | v.c_iflag; termios->c_oflag = (0xffff0000 & termios->c_oflag) | v.c_oflag; termios->c_cflag = (0xffff0000 & termios->c_cflag) | v.c_cflag; termios->c_lflag = (0xffff0000 & termios->c_lflag) | v.c_lflag; termios->c_line = (0xffff0000 & termios->c_lflag) | v.c_line; memcpy(termios->c_cc, v.c_cc, NCC); return 0; } /* * Translate a "termios" structure into a "termio". Ugh. */ __weak int kernel_termios_to_user_termio(struct termio __user *termio, struct ktermios *termios) { struct termio v; memset(&v, 0, sizeof(struct termio)); v.c_iflag = termios->c_iflag; v.c_oflag = termios->c_oflag; v.c_cflag = termios->c_cflag; v.c_lflag = termios->c_lflag; v.c_line = termios->c_line; memcpy(v.c_cc, termios->c_cc, NCC); return copy_to_user(termio, &v, sizeof(struct termio)); } #ifdef TCGETS2 __weak int user_termios_to_kernel_termios(struct ktermios *k, struct termios2 __user *u) { return copy_from_user(k, u, sizeof(struct termios2)); } __weak int kernel_termios_to_user_termios(struct termios2 __user *u, struct ktermios *k) { return copy_to_user(u, k, sizeof(struct termios2)); } __weak int user_termios_to_kernel_termios_1(struct ktermios *k, struct termios __user *u) { return copy_from_user(k, u, sizeof(struct termios)); } __weak int kernel_termios_to_user_termios_1(struct termios __user *u, struct ktermios *k) { return copy_to_user(u, k, sizeof(struct termios)); } #else __weak int user_termios_to_kernel_termios(struct ktermios *k, struct termios __user *u) { return copy_from_user(k, u, sizeof(struct termios)); } __weak int kernel_termios_to_user_termios(struct termios __user *u, struct ktermios *k) { return copy_to_user(u, k, sizeof(struct termios)); } #endif /* TCGETS2 */ /** * set_termios - set termios values for a tty * @tty: terminal device * @arg: user data * @opt: option information * * Helper function to prepare termios data and run necessary other functions * before using tty_set_termios() to do the actual changes. * * Locking: called functions take &tty_struct->ldisc_sem and * &tty_struct->termios_rwsem locks * * Returns: 0 on success, an error otherwise */ static int set_termios(struct tty_struct *tty, void __user *arg, int opt) { struct ktermios tmp_termios; struct tty_ldisc *ld; int retval = tty_check_change(tty); if (retval) return retval; down_read(&tty->termios_rwsem); tmp_termios = tty->termios; up_read(&tty->termios_rwsem); if (opt & TERMIOS_TERMIO) { if (user_termio_to_kernel_termios(&tmp_termios, (struct termio __user *)arg)) return -EFAULT; #ifdef TCGETS2 } else if (opt & TERMIOS_OLD) { if (user_termios_to_kernel_termios_1(&tmp_termios, (struct termios __user *)arg)) return -EFAULT; } else { if (user_termios_to_kernel_termios(&tmp_termios, (struct termios2 __user *)arg)) return -EFAULT; } #else } else if (user_termios_to_kernel_termios(&tmp_termios, (struct termios __user *)arg)) return -EFAULT; #endif /* If old style Bfoo values are used then load c_ispeed/c_ospeed * with the real speed so its unconditionally usable */ tmp_termios.c_ispeed = tty_termios_input_baud_rate(&tmp_termios); tmp_termios.c_ospeed = tty_termios_baud_rate(&tmp_termios); if (opt & (TERMIOS_FLUSH|TERMIOS_WAIT)) { retry_write_wait: retval = wait_event_interruptible(tty->write_wait, !tty_chars_in_buffer(tty)); if (retval < 0) return retval; if (tty_write_lock(tty, false) < 0) goto retry_write_wait; /* Racing writer? */ if (tty_chars_in_buffer(tty)) { tty_write_unlock(tty); goto retry_write_wait; } ld = tty_ldisc_ref(tty); if (ld != NULL) { if ((opt & TERMIOS_FLUSH) && ld->ops->flush_buffer) ld->ops->flush_buffer(tty); tty_ldisc_deref(ld); } if ((opt & TERMIOS_WAIT) && tty->ops->wait_until_sent) { tty->ops->wait_until_sent(tty, 0); if (signal_pending(current)) { tty_write_unlock(tty); return -ERESTARTSYS; } } tty_set_termios(tty, &tmp_termios); tty_write_unlock(tty); } else { tty_set_termios(tty, &tmp_termios); } /* FIXME: Arguably if tmp_termios == tty->termios AND the actual requested termios was not tmp_termios then we may want to return an error as no user requested change has succeeded */ return 0; } static void copy_termios(struct tty_struct *tty, struct ktermios *kterm) { down_read(&tty->termios_rwsem); *kterm = tty->termios; up_read(&tty->termios_rwsem); } static void copy_termios_locked(struct tty_struct *tty, struct ktermios *kterm) { down_read(&tty->termios_rwsem); *kterm = tty->termios_locked; up_read(&tty->termios_rwsem); } static int get_termio(struct tty_struct *tty, struct termio __user *termio) { struct ktermios kterm; copy_termios(tty, &kterm); if (kernel_termios_to_user_termio(termio, &kterm)) return -EFAULT; return 0; } #ifdef TIOCGETP /* * These are deprecated, but there is limited support.. * * The "sg_flags" translation is a joke.. */ static int get_sgflags(struct tty_struct *tty) { int flags = 0; if (!L_ICANON(tty)) { if (L_ISIG(tty)) flags |= 0x02; /* cbreak */ else flags |= 0x20; /* raw */ } if (L_ECHO(tty)) flags |= 0x08; /* echo */ if (O_OPOST(tty)) if (O_ONLCR(tty)) flags |= 0x10; /* crmod */ return flags; } static int get_sgttyb(struct tty_struct *tty, struct sgttyb __user *sgttyb) { struct sgttyb tmp; down_read(&tty->termios_rwsem); tmp.sg_ispeed = tty->termios.c_ispeed; tmp.sg_ospeed = tty->termios.c_ospeed; tmp.sg_erase = tty->termios.c_cc[VERASE]; tmp.sg_kill = tty->termios.c_cc[VKILL]; tmp.sg_flags = get_sgflags(tty); up_read(&tty->termios_rwsem); return copy_to_user(sgttyb, &tmp, sizeof(tmp)) ? -EFAULT : 0; } static void set_sgflags(struct ktermios *termios, int flags) { termios->c_iflag = ICRNL | IXON; termios->c_oflag = 0; termios->c_lflag = ISIG | ICANON; if (flags & 0x02) { /* cbreak */ termios->c_iflag = 0; termios->c_lflag &= ~ICANON; } if (flags & 0x08) { /* echo */ termios->c_lflag |= ECHO | ECHOE | ECHOK | ECHOCTL | ECHOKE | IEXTEN; } if (flags & 0x10) { /* crmod */ termios->c_oflag |= OPOST | ONLCR; } if (flags & 0x20) { /* raw */ termios->c_iflag = 0; termios->c_lflag &= ~(ISIG | ICANON); } if (!(termios->c_lflag & ICANON)) { termios->c_cc[VMIN] = 1; termios->c_cc[VTIME] = 0; } } /** * set_sgttyb - set legacy terminal values * @tty: tty structure * @sgttyb: pointer to old style terminal structure * * Updates a terminal from the legacy BSD style terminal information structure. * * Locking: &tty_struct->termios_rwsem * * Returns: 0 on success, an error otherwise */ static int set_sgttyb(struct tty_struct *tty, struct sgttyb __user *sgttyb) { int retval; struct sgttyb tmp; struct ktermios termios; retval = tty_check_change(tty); if (retval) return retval; if (copy_from_user(&tmp, sgttyb, sizeof(tmp))) return -EFAULT; down_write(&tty->termios_rwsem); termios = tty->termios; termios.c_cc[VERASE] = tmp.sg_erase; termios.c_cc[VKILL] = tmp.sg_kill; set_sgflags(&termios, tmp.sg_flags); /* Try and encode into Bfoo format */ tty_termios_encode_baud_rate(&termios, termios.c_ispeed, termios.c_ospeed); up_write(&tty->termios_rwsem); tty_set_termios(tty, &termios); return 0; } #endif #ifdef TIOCGETC static int get_tchars(struct tty_struct *tty, struct tchars __user *tchars) { struct tchars tmp; down_read(&tty->termios_rwsem); tmp.t_intrc = tty->termios.c_cc[VINTR]; tmp.t_quitc = tty->termios.c_cc[VQUIT]; tmp.t_startc = tty->termios.c_cc[VSTART]; tmp.t_stopc = tty->termios.c_cc[VSTOP]; tmp.t_eofc = tty->termios.c_cc[VEOF]; tmp.t_brkc = tty->termios.c_cc[VEOL2]; /* what is brkc anyway? */ up_read(&tty->termios_rwsem); return copy_to_user(tchars, &tmp, sizeof(tmp)) ? -EFAULT : 0; } static int set_tchars(struct tty_struct *tty, struct tchars __user *tchars) { struct tchars tmp; if (copy_from_user(&tmp, tchars, sizeof(tmp))) return -EFAULT; down_write(&tty->termios_rwsem); tty->termios.c_cc[VINTR] = tmp.t_intrc; tty->termios.c_cc[VQUIT] = tmp.t_quitc; tty->termios.c_cc[VSTART] = tmp.t_startc; tty->termios.c_cc[VSTOP] = tmp.t_stopc; tty->termios.c_cc[VEOF] = tmp.t_eofc; tty->termios.c_cc[VEOL2] = tmp.t_brkc; /* what is brkc anyway? */ up_write(&tty->termios_rwsem); return 0; } #endif #ifdef TIOCGLTC static int get_ltchars(struct tty_struct *tty, struct ltchars __user *ltchars) { struct ltchars tmp; down_read(&tty->termios_rwsem); tmp.t_suspc = tty->termios.c_cc[VSUSP]; /* what is dsuspc anyway? */ tmp.t_dsuspc = tty->termios.c_cc[VSUSP]; tmp.t_rprntc = tty->termios.c_cc[VREPRINT]; /* what is flushc anyway? */ tmp.t_flushc = tty->termios.c_cc[VEOL2]; tmp.t_werasc = tty->termios.c_cc[VWERASE]; tmp.t_lnextc = tty->termios.c_cc[VLNEXT]; up_read(&tty->termios_rwsem); return copy_to_user(ltchars, &tmp, sizeof(tmp)) ? -EFAULT : 0; } static int set_ltchars(struct tty_struct *tty, struct ltchars __user *ltchars) { struct ltchars tmp; if (copy_from_user(&tmp, ltchars, sizeof(tmp))) return -EFAULT; down_write(&tty->termios_rwsem); tty->termios.c_cc[VSUSP] = tmp.t_suspc; /* what is dsuspc anyway? */ tty->termios.c_cc[VEOL2] = tmp.t_dsuspc; tty->termios.c_cc[VREPRINT] = tmp.t_rprntc; /* what is flushc anyway? */ tty->termios.c_cc[VEOL2] = tmp.t_flushc; tty->termios.c_cc[VWERASE] = tmp.t_werasc; tty->termios.c_cc[VLNEXT] = tmp.t_lnextc; up_write(&tty->termios_rwsem); return 0; } #endif /** * tty_change_softcar - carrier change ioctl helper * @tty: tty to update * @enable: enable/disable %CLOCAL * * Perform a change to the %CLOCAL state and call into the driver layer to make * it visible. * * Locking: &tty_struct->termios_rwsem. * * Returns: 0 on success, an error otherwise */ static int tty_change_softcar(struct tty_struct *tty, bool enable) { int ret = 0; struct ktermios old; tcflag_t bit = enable ? CLOCAL : 0; down_write(&tty->termios_rwsem); old = tty->termios; tty->termios.c_cflag &= ~CLOCAL; tty->termios.c_cflag |= bit; if (tty->ops->set_termios) tty->ops->set_termios(tty, &old); if (C_CLOCAL(tty) != bit) ret = -EINVAL; up_write(&tty->termios_rwsem); return ret; } /** * tty_mode_ioctl - mode related ioctls * @tty: tty for the ioctl * @cmd: command * @arg: ioctl argument * * Perform non-line discipline specific mode control ioctls. This is designed * to be called by line disciplines to ensure they provide consistent mode * setting. */ int tty_mode_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct tty_struct *real_tty; void __user *p = (void __user *)arg; int ret = 0; struct ktermios kterm; if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) real_tty = tty->link; else real_tty = tty; switch (cmd) { #ifdef TIOCGETP case TIOCGETP: return get_sgttyb(real_tty, (struct sgttyb __user *) arg); case TIOCSETP: case TIOCSETN: return set_sgttyb(real_tty, (struct sgttyb __user *) arg); #endif #ifdef TIOCGETC case TIOCGETC: return get_tchars(real_tty, p); case TIOCSETC: return set_tchars(real_tty, p); #endif #ifdef TIOCGLTC case TIOCGLTC: return get_ltchars(real_tty, p); case TIOCSLTC: return set_ltchars(real_tty, p); #endif case TCSETSF: return set_termios(real_tty, p, TERMIOS_FLUSH | TERMIOS_WAIT | TERMIOS_OLD); case TCSETSW: return set_termios(real_tty, p, TERMIOS_WAIT | TERMIOS_OLD); case TCSETS: return set_termios(real_tty, p, TERMIOS_OLD); #ifndef TCGETS2 case TCGETS: copy_termios(real_tty, &kterm); if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm)) ret = -EFAULT; return ret; #else case TCGETS: copy_termios(real_tty, &kterm); if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm)) ret = -EFAULT; return ret; case TCGETS2: copy_termios(real_tty, &kterm); if (kernel_termios_to_user_termios((struct termios2 __user *)arg, &kterm)) ret = -EFAULT; return ret; case TCSETSF2: return set_termios(real_tty, p, TERMIOS_FLUSH | TERMIOS_WAIT); case TCSETSW2: return set_termios(real_tty, p, TERMIOS_WAIT); case TCSETS2: return set_termios(real_tty, p, 0); #endif case TCGETA: return get_termio(real_tty, p); case TCSETAF: return set_termios(real_tty, p, TERMIOS_FLUSH | TERMIOS_WAIT | TERMIOS_TERMIO); case TCSETAW: return set_termios(real_tty, p, TERMIOS_WAIT | TERMIOS_TERMIO); case TCSETA: return set_termios(real_tty, p, TERMIOS_TERMIO); #ifndef TCGETS2 case TIOCGLCKTRMIOS: copy_termios_locked(real_tty, &kterm); if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm)) ret = -EFAULT; return ret; case TIOCSLCKTRMIOS: if (!checkpoint_restore_ns_capable(&init_user_ns)) return -EPERM; copy_termios_locked(real_tty, &kterm); if (user_termios_to_kernel_termios(&kterm, (struct termios __user *) arg)) return -EFAULT; down_write(&real_tty->termios_rwsem); real_tty->termios_locked = kterm; up_write(&real_tty->termios_rwsem); return 0; #else case TIOCGLCKTRMIOS: copy_termios_locked(real_tty, &kterm); if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm)) ret = -EFAULT; return ret; case TIOCSLCKTRMIOS: if (!checkpoint_restore_ns_capable(&init_user_ns)) return -EPERM; copy_termios_locked(real_tty, &kterm); if (user_termios_to_kernel_termios_1(&kterm, (struct termios __user *) arg)) return -EFAULT; down_write(&real_tty->termios_rwsem); real_tty->termios_locked = kterm; up_write(&real_tty->termios_rwsem); return ret; #endif #ifdef TCGETX case TCGETX: case TCSETX: case TCSETXW: case TCSETXF: return -ENOTTY; #endif case TIOCGSOFTCAR: copy_termios(real_tty, &kterm); ret = put_user((kterm.c_cflag & CLOCAL) ? 1 : 0, (int __user *)arg); return ret; case TIOCSSOFTCAR: if (get_user(arg, (unsigned int __user *) arg)) return -EFAULT; return tty_change_softcar(real_tty, arg); default: return -ENOIOCTLCMD; } } EXPORT_SYMBOL_GPL(tty_mode_ioctl); /* Caller guarantees ldisc reference is held */ static int __tty_perform_flush(struct tty_struct *tty, unsigned long arg) { struct tty_ldisc *ld = tty->ldisc; switch (arg) { case TCIFLUSH: if (ld && ld->ops->flush_buffer) { ld->ops->flush_buffer(tty); tty_unthrottle(tty); } break; case TCIOFLUSH: if (ld && ld->ops->flush_buffer) { ld->ops->flush_buffer(tty); tty_unthrottle(tty); } fallthrough; case TCOFLUSH: tty_driver_flush_buffer(tty); break; default: return -EINVAL; } return 0; } int tty_perform_flush(struct tty_struct *tty, unsigned long arg) { struct tty_ldisc *ld; int retval = tty_check_change(tty); if (retval) return retval; ld = tty_ldisc_ref_wait(tty); retval = __tty_perform_flush(tty, arg); if (ld) tty_ldisc_deref(ld); return retval; } EXPORT_SYMBOL_GPL(tty_perform_flush); int n_tty_ioctl_helper(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { int retval; switch (cmd) { case TCXONC: retval = tty_check_change(tty); if (retval) return retval; switch (arg) { case TCOOFF: spin_lock_irq(&tty->flow.lock); if (!tty->flow.tco_stopped) { tty->flow.tco_stopped = true; __stop_tty(tty); } spin_unlock_irq(&tty->flow.lock); break; case TCOON: spin_lock_irq(&tty->flow.lock); if (tty->flow.tco_stopped) { tty->flow.tco_stopped = false; __start_tty(tty); } spin_unlock_irq(&tty->flow.lock); break; case TCIOFF: if (STOP_CHAR(tty) != __DISABLED_CHAR) retval = tty_send_xchar(tty, STOP_CHAR(tty)); break; case TCION: if (START_CHAR(tty) != __DISABLED_CHAR) retval = tty_send_xchar(tty, START_CHAR(tty)); break; default: return -EINVAL; } return retval; case TCFLSH: retval = tty_check_change(tty); if (retval) return retval; return __tty_perform_flush(tty, arg); default: /* Try the mode commands */ return tty_mode_ioctl(tty, cmd, arg); } } EXPORT_SYMBOL(n_tty_ioctl_helper); |
| 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 | // SPDX-License-Identifier: GPL-2.0+ /* * Berkshire USB-PC Watchdog Card Driver * * (c) Copyright 2004-2007 Wim Van Sebroeck <wim@iguana.be>. * * Based on source code of the following authors: * Ken Hollis <kenji@bitgate.com>, * Alan Cox <alan@lxorguk.ukuu.org.uk>, * Matt Domsch <Matt_Domsch@dell.com>, * Rob Radez <rob@osinvestor.com>, * Greg Kroah-Hartman <greg@kroah.com> * * Neither Wim Van Sebroeck nor Iguana vzw. admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * * Thanks also to Simon Machell at Berkshire Products Inc. for * providing the test hardware. More info is available at * http://www.berkprod.com/ or http://www.pcwatchdog.com/ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> /* For module specific items */ #include <linux/moduleparam.h> /* For new moduleparam's */ #include <linux/types.h> /* For standard types (like size_t) */ #include <linux/errno.h> /* For the -ENODEV/... values */ #include <linux/kernel.h> /* For printk/panic/... */ #include <linux/delay.h> /* For mdelay function */ #include <linux/miscdevice.h> /* For struct miscdevice */ #include <linux/watchdog.h> /* For the watchdog specific items */ #include <linux/notifier.h> /* For notifier support */ #include <linux/reboot.h> /* For reboot_notifier stuff */ #include <linux/init.h> /* For __init/__exit/... */ #include <linux/fs.h> /* For file operations */ #include <linux/usb.h> /* For USB functions */ #include <linux/slab.h> /* For kmalloc, ... */ #include <linux/mutex.h> /* For mutex locking */ #include <linux/hid.h> /* For HID_REQ_SET_REPORT & HID_DT_REPORT */ #include <linux/uaccess.h> /* For copy_to_user/put_user/... */ /* Module and Version Information */ #define DRIVER_VERSION "1.02" #define DRIVER_AUTHOR "Wim Van Sebroeck <wim@iguana.be>" #define DRIVER_DESC "Berkshire USB-PC Watchdog driver" #define DRIVER_NAME "pcwd_usb" MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); #define WATCHDOG_HEARTBEAT 0 /* default heartbeat = delay-time from dip-switches */ static int heartbeat = WATCHDOG_HEARTBEAT; module_param(heartbeat, int, 0); MODULE_PARM_DESC(heartbeat, "Watchdog heartbeat in seconds. " "(0<heartbeat<65536 or 0=delay-time from dip-switches, default=" __MODULE_STRING(WATCHDOG_HEARTBEAT) ")"); static bool nowayout = WATCHDOG_NOWAYOUT; module_param(nowayout, bool, 0); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); /* The vendor and product id's for the USB-PC Watchdog card */ #define USB_PCWD_VENDOR_ID 0x0c98 #define USB_PCWD_PRODUCT_ID 0x1140 /* table of devices that work with this driver */ static const struct usb_device_id usb_pcwd_table[] = { { USB_DEVICE(USB_PCWD_VENDOR_ID, USB_PCWD_PRODUCT_ID) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, usb_pcwd_table); /* according to documentation max. time to process a command for the USB * watchdog card is 100 or 200 ms, so we give it 250 ms to do it's job */ #define USB_COMMAND_TIMEOUT 250 /* Watchdog's internal commands */ #define CMD_READ_TEMP 0x02 /* Read Temperature; Re-trigger Watchdog */ #define CMD_TRIGGER CMD_READ_TEMP #define CMD_GET_STATUS 0x04 /* Get Status Information */ #define CMD_GET_FIRMWARE_VERSION 0x08 /* Get Firmware Version */ #define CMD_GET_DIP_SWITCH_SETTINGS 0x0c /* Get Dip Switch Settings */ #define CMD_READ_WATCHDOG_TIMEOUT 0x18 /* Read Current Watchdog Time */ #define CMD_WRITE_WATCHDOG_TIMEOUT 0x19 /* Write Current WatchdogTime */ #define CMD_ENABLE_WATCHDOG 0x30 /* Enable / Disable Watchdog */ #define CMD_DISABLE_WATCHDOG CMD_ENABLE_WATCHDOG /* Watchdog's Dip Switch heartbeat values */ static const int heartbeat_tbl[] = { 5, /* OFF-OFF-OFF = 5 Sec */ 10, /* OFF-OFF-ON = 10 Sec */ 30, /* OFF-ON-OFF = 30 Sec */ 60, /* OFF-ON-ON = 1 Min */ 300, /* ON-OFF-OFF = 5 Min */ 600, /* ON-OFF-ON = 10 Min */ 1800, /* ON-ON-OFF = 30 Min */ 3600, /* ON-ON-ON = 1 hour */ }; /* We can only use 1 card due to the /dev/watchdog restriction */ static int cards_found; /* some internal variables */ static unsigned long is_active; static char expect_release; /* Structure to hold all of our device specific stuff */ struct usb_pcwd_private { /* save off the usb device pointer */ struct usb_device *udev; /* the interface for this device */ struct usb_interface *interface; /* the interface number used for cmd's */ unsigned int interface_number; /* the buffer to intr data */ unsigned char *intr_buffer; /* the dma address for the intr buffer */ dma_addr_t intr_dma; /* the size of the intr buffer */ size_t intr_size; /* the urb used for the intr pipe */ struct urb *intr_urb; /* The command that is reported back */ unsigned char cmd_command; /* The data MSB that is reported back */ unsigned char cmd_data_msb; /* The data LSB that is reported back */ unsigned char cmd_data_lsb; /* true if we received a report after a command */ atomic_t cmd_received; /* Wether or not the device exists */ int exists; /* locks this structure */ struct mutex mtx; }; static struct usb_pcwd_private *usb_pcwd_device; /* prevent races between open() and disconnect() */ static DEFINE_MUTEX(disconnect_mutex); /* local function prototypes */ static int usb_pcwd_probe(struct usb_interface *interface, const struct usb_device_id *id); static void usb_pcwd_disconnect(struct usb_interface *interface); /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver usb_pcwd_driver = { .name = DRIVER_NAME, .probe = usb_pcwd_probe, .disconnect = usb_pcwd_disconnect, .id_table = usb_pcwd_table, }; static void usb_pcwd_intr_done(struct urb *urb) { struct usb_pcwd_private *usb_pcwd = (struct usb_pcwd_private *)urb->context; unsigned char *data = usb_pcwd->intr_buffer; struct device *dev = &usb_pcwd->interface->dev; int retval; switch (urb->status) { case 0: /* success */ break; case -ECONNRESET: /* unlink */ case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ dev_dbg(dev, "%s - urb shutting down with status: %d", __func__, urb->status); return; /* -EPIPE: should clear the halt */ default: /* error */ dev_dbg(dev, "%s - nonzero urb status received: %d", __func__, urb->status); goto resubmit; } dev_dbg(dev, "received following data cmd=0x%02x msb=0x%02x lsb=0x%02x", data[0], data[1], data[2]); usb_pcwd->cmd_command = data[0]; usb_pcwd->cmd_data_msb = data[1]; usb_pcwd->cmd_data_lsb = data[2]; /* notify anyone waiting that the cmd has finished */ atomic_set(&usb_pcwd->cmd_received, 1); resubmit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) pr_err("can't resubmit intr, usb_submit_urb failed with result %d\n", retval); } static int usb_pcwd_send_command(struct usb_pcwd_private *usb_pcwd, unsigned char cmd, unsigned char *msb, unsigned char *lsb) { int got_response, count; unsigned char *buf; /* We will not send any commands if the USB PCWD device does * not exist */ if ((!usb_pcwd) || (!usb_pcwd->exists)) return -1; buf = kmalloc(6, GFP_KERNEL); if (buf == NULL) return 0; /* The USB PC Watchdog uses a 6 byte report format. * The board currently uses only 3 of the six bytes of the report. */ buf[0] = cmd; /* Byte 0 = CMD */ buf[1] = *msb; /* Byte 1 = Data MSB */ buf[2] = *lsb; /* Byte 2 = Data LSB */ buf[3] = buf[4] = buf[5] = 0; /* All other bytes not used */ dev_dbg(&usb_pcwd->interface->dev, "sending following data cmd=0x%02x msb=0x%02x lsb=0x%02x", buf[0], buf[1], buf[2]); atomic_set(&usb_pcwd->cmd_received, 0); if (usb_control_msg(usb_pcwd->udev, usb_sndctrlpipe(usb_pcwd->udev, 0), HID_REQ_SET_REPORT, HID_DT_REPORT, 0x0200, usb_pcwd->interface_number, buf, 6, USB_COMMAND_TIMEOUT) != 6) { dev_dbg(&usb_pcwd->interface->dev, "usb_pcwd_send_command: error in usb_control_msg for cmd 0x%x 0x%x 0x%x\n", cmd, *msb, *lsb); } /* wait till the usb card processed the command, * with a max. timeout of USB_COMMAND_TIMEOUT */ got_response = 0; for (count = 0; (count < USB_COMMAND_TIMEOUT) && (!got_response); count++) { mdelay(1); if (atomic_read(&usb_pcwd->cmd_received)) got_response = 1; } if ((got_response) && (cmd == usb_pcwd->cmd_command)) { /* read back response */ *msb = usb_pcwd->cmd_data_msb; *lsb = usb_pcwd->cmd_data_lsb; } kfree(buf); return got_response; } static int usb_pcwd_start(struct usb_pcwd_private *usb_pcwd) { unsigned char msb = 0x00; unsigned char lsb = 0x00; int retval; /* Enable Watchdog */ retval = usb_pcwd_send_command(usb_pcwd, CMD_ENABLE_WATCHDOG, &msb, &lsb); if ((retval == 0) || (lsb == 0)) { pr_err("Card did not acknowledge enable attempt\n"); return -1; } return 0; } static int usb_pcwd_stop(struct usb_pcwd_private *usb_pcwd) { unsigned char msb = 0xA5; unsigned char lsb = 0xC3; int retval; /* Disable Watchdog */ retval = usb_pcwd_send_command(usb_pcwd, CMD_DISABLE_WATCHDOG, &msb, &lsb); if ((retval == 0) || (lsb != 0)) { pr_err("Card did not acknowledge disable attempt\n"); return -1; } return 0; } static int usb_pcwd_keepalive(struct usb_pcwd_private *usb_pcwd) { unsigned char dummy; /* Re-trigger Watchdog */ usb_pcwd_send_command(usb_pcwd, CMD_TRIGGER, &dummy, &dummy); return 0; } static int usb_pcwd_set_heartbeat(struct usb_pcwd_private *usb_pcwd, int t) { unsigned char msb = t / 256; unsigned char lsb = t % 256; if ((t < 0x0001) || (t > 0xFFFF)) return -EINVAL; /* Write new heartbeat to watchdog */ usb_pcwd_send_command(usb_pcwd, CMD_WRITE_WATCHDOG_TIMEOUT, &msb, &lsb); heartbeat = t; return 0; } static int usb_pcwd_get_temperature(struct usb_pcwd_private *usb_pcwd, int *temperature) { unsigned char msb = 0x00; unsigned char lsb = 0x00; usb_pcwd_send_command(usb_pcwd, CMD_READ_TEMP, &msb, &lsb); /* * Convert celsius to fahrenheit, since this was * the decided 'standard' for this return value. */ *temperature = (lsb * 9 / 5) + 32; return 0; } static int usb_pcwd_get_timeleft(struct usb_pcwd_private *usb_pcwd, int *time_left) { unsigned char msb = 0x00; unsigned char lsb = 0x00; /* Read the time that's left before rebooting */ /* Note: if the board is not yet armed then we will read 0xFFFF */ usb_pcwd_send_command(usb_pcwd, CMD_READ_WATCHDOG_TIMEOUT, &msb, &lsb); *time_left = (msb << 8) + lsb; return 0; } /* * /dev/watchdog handling */ static ssize_t usb_pcwd_write(struct file *file, const char __user *data, size_t len, loff_t *ppos) { /* See if we got the magic character 'V' and reload the timer */ if (len) { if (!nowayout) { size_t i; /* note: just in case someone wrote the magic character * five months ago... */ expect_release = 0; /* scan to see whether or not we got the * magic character */ for (i = 0; i != len; i++) { char c; if (get_user(c, data + i)) return -EFAULT; if (c == 'V') expect_release = 42; } } /* someone wrote to us, we should reload the timer */ usb_pcwd_keepalive(usb_pcwd_device); } return len; } static long usb_pcwd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; int __user *p = argp; static const struct watchdog_info ident = { .options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE, .firmware_version = 1, .identity = DRIVER_NAME, }; switch (cmd) { case WDIOC_GETSUPPORT: return copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0; case WDIOC_GETSTATUS: case WDIOC_GETBOOTSTATUS: return put_user(0, p); case WDIOC_GETTEMP: { int temperature; if (usb_pcwd_get_temperature(usb_pcwd_device, &temperature)) return -EFAULT; return put_user(temperature, p); } case WDIOC_SETOPTIONS: { int new_options, retval = -EINVAL; if (get_user(new_options, p)) return -EFAULT; if (new_options & WDIOS_DISABLECARD) { usb_pcwd_stop(usb_pcwd_device); retval = 0; } if (new_options & WDIOS_ENABLECARD) { usb_pcwd_start(usb_pcwd_device); retval = 0; } return retval; } case WDIOC_KEEPALIVE: usb_pcwd_keepalive(usb_pcwd_device); return 0; case WDIOC_SETTIMEOUT: { int new_heartbeat; if (get_user(new_heartbeat, p)) return -EFAULT; if (usb_pcwd_set_heartbeat(usb_pcwd_device, new_heartbeat)) return -EINVAL; usb_pcwd_keepalive(usb_pcwd_device); } fallthrough; case WDIOC_GETTIMEOUT: return put_user(heartbeat, p); case WDIOC_GETTIMELEFT: { int time_left; if (usb_pcwd_get_timeleft(usb_pcwd_device, &time_left)) return -EFAULT; return put_user(time_left, p); } default: return -ENOTTY; } } static int usb_pcwd_open(struct inode *inode, struct file *file) { /* /dev/watchdog can only be opened once */ if (test_and_set_bit(0, &is_active)) return -EBUSY; /* Activate */ usb_pcwd_start(usb_pcwd_device); usb_pcwd_keepalive(usb_pcwd_device); return stream_open(inode, file); } static int usb_pcwd_release(struct inode *inode, struct file *file) { /* * Shut off the timer. */ if (expect_release == 42) { usb_pcwd_stop(usb_pcwd_device); } else { pr_crit("Unexpected close, not stopping watchdog!\n"); usb_pcwd_keepalive(usb_pcwd_device); } expect_release = 0; clear_bit(0, &is_active); return 0; } /* * /dev/temperature handling */ static ssize_t usb_pcwd_temperature_read(struct file *file, char __user *data, size_t len, loff_t *ppos) { int temperature; if (usb_pcwd_get_temperature(usb_pcwd_device, &temperature)) return -EFAULT; if (copy_to_user(data, &temperature, 1)) return -EFAULT; return 1; } static int usb_pcwd_temperature_open(struct inode *inode, struct file *file) { return stream_open(inode, file); } static int usb_pcwd_temperature_release(struct inode *inode, struct file *file) { return 0; } /* * Notify system */ static int usb_pcwd_notify_sys(struct notifier_block *this, unsigned long code, void *unused) { if (code == SYS_DOWN || code == SYS_HALT) usb_pcwd_stop(usb_pcwd_device); /* Turn the WDT off */ return NOTIFY_DONE; } /* * Kernel Interfaces */ static const struct file_operations usb_pcwd_fops = { .owner = THIS_MODULE, .write = usb_pcwd_write, .unlocked_ioctl = usb_pcwd_ioctl, .compat_ioctl = compat_ptr_ioctl, .open = usb_pcwd_open, .release = usb_pcwd_release, }; static struct miscdevice usb_pcwd_miscdev = { .minor = WATCHDOG_MINOR, .name = "watchdog", .fops = &usb_pcwd_fops, }; static const struct file_operations usb_pcwd_temperature_fops = { .owner = THIS_MODULE, .read = usb_pcwd_temperature_read, .open = usb_pcwd_temperature_open, .release = usb_pcwd_temperature_release, }; static struct miscdevice usb_pcwd_temperature_miscdev = { .minor = TEMP_MINOR, .name = "temperature", .fops = &usb_pcwd_temperature_fops, }; static struct notifier_block usb_pcwd_notifier = { .notifier_call = usb_pcwd_notify_sys, }; /* * usb_pcwd_delete */ static inline void usb_pcwd_delete(struct usb_pcwd_private *usb_pcwd) { usb_free_urb(usb_pcwd->intr_urb); usb_free_coherent(usb_pcwd->udev, usb_pcwd->intr_size, usb_pcwd->intr_buffer, usb_pcwd->intr_dma); kfree(usb_pcwd); } /* * usb_pcwd_probe * * Called by the usb core when a new device is connected that it thinks * this driver might be interested in. */ static int usb_pcwd_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(interface); struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor *endpoint; struct usb_pcwd_private *usb_pcwd = NULL; int pipe; int retval = -ENOMEM; int got_fw_rev; unsigned char fw_rev_major, fw_rev_minor; char fw_ver_str[20]; unsigned char option_switches, dummy; cards_found++; if (cards_found > 1) { pr_err("This driver only supports 1 device\n"); return -ENODEV; } /* get the active interface descriptor */ iface_desc = interface->cur_altsetting; /* check out that we have a HID device */ if (!(iface_desc->desc.bInterfaceClass == USB_CLASS_HID)) { pr_err("The device isn't a Human Interface Device\n"); return -ENODEV; } if (iface_desc->desc.bNumEndpoints < 1) return -ENODEV; /* check out the endpoint: it has to be Interrupt & IN */ endpoint = &iface_desc->endpoint[0].desc; if (!usb_endpoint_is_int_in(endpoint)) { /* we didn't find a Interrupt endpoint with direction IN */ pr_err("Couldn't find an INTR & IN endpoint\n"); return -ENODEV; } /* get a handle to the interrupt data pipe */ pipe = usb_rcvintpipe(udev, endpoint->bEndpointAddress); /* allocate memory for our device and initialize it */ usb_pcwd = kzalloc(sizeof(struct usb_pcwd_private), GFP_KERNEL); if (usb_pcwd == NULL) goto error; usb_pcwd_device = usb_pcwd; mutex_init(&usb_pcwd->mtx); usb_pcwd->udev = udev; usb_pcwd->interface = interface; usb_pcwd->interface_number = iface_desc->desc.bInterfaceNumber; usb_pcwd->intr_size = (le16_to_cpu(endpoint->wMaxPacketSize) > 8 ? le16_to_cpu(endpoint->wMaxPacketSize) : 8); /* set up the memory buffer's */ usb_pcwd->intr_buffer = usb_alloc_coherent(udev, usb_pcwd->intr_size, GFP_KERNEL, &usb_pcwd->intr_dma); if (!usb_pcwd->intr_buffer) { pr_err("Out of memory\n"); goto error; } /* allocate the urb's */ usb_pcwd->intr_urb = usb_alloc_urb(0, GFP_KERNEL); if (!usb_pcwd->intr_urb) goto error; /* initialise the intr urb's */ usb_fill_int_urb(usb_pcwd->intr_urb, udev, pipe, usb_pcwd->intr_buffer, usb_pcwd->intr_size, usb_pcwd_intr_done, usb_pcwd, endpoint->bInterval); usb_pcwd->intr_urb->transfer_dma = usb_pcwd->intr_dma; usb_pcwd->intr_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; /* register our interrupt URB with the USB system */ if (usb_submit_urb(usb_pcwd->intr_urb, GFP_KERNEL)) { pr_err("Problem registering interrupt URB\n"); retval = -EIO; /* failure */ goto error; } /* The device exists and can be communicated with */ usb_pcwd->exists = 1; /* disable card */ usb_pcwd_stop(usb_pcwd); /* Get the Firmware Version */ got_fw_rev = usb_pcwd_send_command(usb_pcwd, CMD_GET_FIRMWARE_VERSION, &fw_rev_major, &fw_rev_minor); if (got_fw_rev) sprintf(fw_ver_str, "%u.%02u", fw_rev_major, fw_rev_minor); else sprintf(fw_ver_str, "<card no answer>"); pr_info("Found card (Firmware: %s) with temp option\n", fw_ver_str); /* Get switch settings */ usb_pcwd_send_command(usb_pcwd, CMD_GET_DIP_SWITCH_SETTINGS, &dummy, &option_switches); pr_info("Option switches (0x%02x): Temperature Reset Enable=%s, Power On Delay=%s\n", option_switches, ((option_switches & 0x10) ? "ON" : "OFF"), ((option_switches & 0x08) ? "ON" : "OFF")); /* If heartbeat = 0 then we use the heartbeat from the dip-switches */ if (heartbeat == 0) heartbeat = heartbeat_tbl[(option_switches & 0x07)]; /* Check that the heartbeat value is within it's range ; * if not reset to the default */ if (usb_pcwd_set_heartbeat(usb_pcwd, heartbeat)) { usb_pcwd_set_heartbeat(usb_pcwd, WATCHDOG_HEARTBEAT); pr_info("heartbeat value must be 0<heartbeat<65536, using %d\n", WATCHDOG_HEARTBEAT); } retval = register_reboot_notifier(&usb_pcwd_notifier); if (retval != 0) { pr_err("cannot register reboot notifier (err=%d)\n", retval); goto error; } retval = misc_register(&usb_pcwd_temperature_miscdev); if (retval != 0) { pr_err("cannot register miscdev on minor=%d (err=%d)\n", TEMP_MINOR, retval); goto err_out_unregister_reboot; } retval = misc_register(&usb_pcwd_miscdev); if (retval != 0) { pr_err("cannot register miscdev on minor=%d (err=%d)\n", WATCHDOG_MINOR, retval); goto err_out_misc_deregister; } /* we can register the device now, as it is ready */ usb_set_intfdata(interface, usb_pcwd); pr_info("initialized. heartbeat=%d sec (nowayout=%d)\n", heartbeat, nowayout); return 0; err_out_misc_deregister: misc_deregister(&usb_pcwd_temperature_miscdev); err_out_unregister_reboot: unregister_reboot_notifier(&usb_pcwd_notifier); error: if (usb_pcwd) usb_pcwd_delete(usb_pcwd); usb_pcwd_device = NULL; return retval; } /* * usb_pcwd_disconnect * * Called by the usb core when the device is removed from the system. * * This routine guarantees that the driver will not submit any more urbs * by clearing dev->udev. */ static void usb_pcwd_disconnect(struct usb_interface *interface) { struct usb_pcwd_private *usb_pcwd; /* prevent races with open() */ mutex_lock(&disconnect_mutex); usb_pcwd = usb_get_intfdata(interface); usb_set_intfdata(interface, NULL); mutex_lock(&usb_pcwd->mtx); /* Stop the timer before we leave */ if (!nowayout) usb_pcwd_stop(usb_pcwd); /* We should now stop communicating with the USB PCWD device */ usb_pcwd->exists = 0; /* Deregister */ misc_deregister(&usb_pcwd_miscdev); misc_deregister(&usb_pcwd_temperature_miscdev); unregister_reboot_notifier(&usb_pcwd_notifier); mutex_unlock(&usb_pcwd->mtx); /* Delete the USB PCWD device */ usb_pcwd_delete(usb_pcwd); cards_found--; mutex_unlock(&disconnect_mutex); pr_info("USB PC Watchdog disconnected\n"); } module_usb_driver(usb_pcwd_driver); |
| 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Linux driver for TerraTec DMX 6Fire USB * * Firmware loader * * Author: Torsten Schenk <torsten.schenk@zoho.com> * Created: Jan 01, 2011 * Copyright: (C) Torsten Schenk */ #include <linux/firmware.h> #include <linux/module.h> #include <linux/bitrev.h> #include <linux/kernel.h> #include "firmware.h" #include "chip.h" MODULE_FIRMWARE("6fire/dmx6firel2.ihx"); MODULE_FIRMWARE("6fire/dmx6fireap.ihx"); MODULE_FIRMWARE("6fire/dmx6firecf.bin"); enum { FPGA_BUFSIZE = 512, FPGA_EP = 2 }; /* * wMaxPacketSize of pcm endpoints. * keep synced with rates_in_packet_size and rates_out_packet_size in pcm.c * fpp: frames per isopacket * * CAUTION: keep sizeof <= buffer[] in usb6fire_fw_init */ static const u8 ep_w_max_packet_size[] = { 0xe4, 0x00, 0xe4, 0x00, /* alt 1: 228 EP2 and EP6 (7 fpp) */ 0xa4, 0x01, 0xa4, 0x01, /* alt 2: 420 EP2 and EP6 (13 fpp)*/ 0x94, 0x01, 0x5c, 0x02 /* alt 3: 404 EP2 and 604 EP6 (25 fpp) */ }; static const u8 known_fw_versions[][2] = { { 0x03, 0x01 } }; struct ihex_record { u16 address; u8 len; u8 data[256]; char error; /* true if an error occurred parsing this record */ u8 max_len; /* maximum record length in whole ihex */ /* private */ const char *txt_data; unsigned int txt_length; unsigned int txt_offset; /* current position in txt_data */ }; static u8 usb6fire_fw_ihex_hex(const u8 *data, u8 *crc) { u8 val = 0; int hval; hval = hex_to_bin(data[0]); if (hval >= 0) val |= (hval << 4); hval = hex_to_bin(data[1]); if (hval >= 0) val |= hval; *crc += val; return val; } /* * returns true if record is available, false otherwise. * iff an error occurred, false will be returned and record->error will be true. */ static bool usb6fire_fw_ihex_next_record(struct ihex_record *record) { u8 crc = 0; u8 type; int i; record->error = false; /* find begin of record (marked by a colon) */ while (record->txt_offset < record->txt_length && record->txt_data[record->txt_offset] != ':') record->txt_offset++; if (record->txt_offset == record->txt_length) return false; /* number of characters needed for len, addr and type entries */ record->txt_offset++; if (record->txt_offset + 8 > record->txt_length) { record->error = true; return false; } record->len = usb6fire_fw_ihex_hex(record->txt_data + record->txt_offset, &crc); record->txt_offset += 2; record->address = usb6fire_fw_ihex_hex(record->txt_data + record->txt_offset, &crc) << 8; record->txt_offset += 2; record->address |= usb6fire_fw_ihex_hex(record->txt_data + record->txt_offset, &crc); record->txt_offset += 2; type = usb6fire_fw_ihex_hex(record->txt_data + record->txt_offset, &crc); record->txt_offset += 2; /* number of characters needed for data and crc entries */ if (record->txt_offset + 2 * (record->len + 1) > record->txt_length) { record->error = true; return false; } for (i = 0; i < record->len; i++) { record->data[i] = usb6fire_fw_ihex_hex(record->txt_data + record->txt_offset, &crc); record->txt_offset += 2; } usb6fire_fw_ihex_hex(record->txt_data + record->txt_offset, &crc); if (crc) { record->error = true; return false; } if (type == 1 || !record->len) /* eof */ return false; else if (type == 0) return true; else { record->error = true; return false; } } static int usb6fire_fw_ihex_init(const struct firmware *fw, struct ihex_record *record) { record->txt_data = fw->data; record->txt_length = fw->size; record->txt_offset = 0; record->max_len = 0; /* read all records, if loop ends, record->error indicates, * whether ihex is valid. */ while (usb6fire_fw_ihex_next_record(record)) record->max_len = max(record->len, record->max_len); if (record->error) return -EINVAL; record->txt_offset = 0; return 0; } static int usb6fire_fw_ezusb_write(struct usb_device *device, int type, int value, char *data, int len) { return usb_control_msg_send(device, 0, type, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, 0, data, len, 1000, GFP_KERNEL); } static int usb6fire_fw_ezusb_read(struct usb_device *device, int type, int value, char *data, int len) { return usb_control_msg_recv(device, 0, type, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, 0, data, len, 1000, GFP_KERNEL); } static int usb6fire_fw_fpga_write(struct usb_device *device, char *data, int len) { int actual_len; int ret; ret = usb_bulk_msg(device, usb_sndbulkpipe(device, FPGA_EP), data, len, &actual_len, 1000); if (ret < 0) return ret; else if (actual_len != len) return -EIO; return 0; } static int usb6fire_fw_ezusb_upload( struct usb_interface *intf, const char *fwname, unsigned int postaddr, u8 *postdata, unsigned int postlen) { int ret; u8 data; struct usb_device *device = interface_to_usbdev(intf); const struct firmware *fw = NULL; struct ihex_record *rec = kmalloc(sizeof(struct ihex_record), GFP_KERNEL); if (!rec) return -ENOMEM; ret = request_firmware(&fw, fwname, &device->dev); if (ret < 0) { kfree(rec); dev_err(&intf->dev, "error requesting ezusb firmware %s.\n", fwname); return ret; } ret = usb6fire_fw_ihex_init(fw, rec); if (ret < 0) { kfree(rec); release_firmware(fw); dev_err(&intf->dev, "error validating ezusb firmware %s.\n", fwname); return ret; } /* upload firmware image */ data = 0x01; /* stop ezusb cpu */ ret = usb6fire_fw_ezusb_write(device, 0xa0, 0xe600, &data, 1); if (ret) { kfree(rec); release_firmware(fw); dev_err(&intf->dev, "unable to upload ezusb firmware %s: begin message.\n", fwname); return ret; } while (usb6fire_fw_ihex_next_record(rec)) { /* write firmware */ ret = usb6fire_fw_ezusb_write(device, 0xa0, rec->address, rec->data, rec->len); if (ret) { kfree(rec); release_firmware(fw); dev_err(&intf->dev, "unable to upload ezusb firmware %s: data urb.\n", fwname); return ret; } } release_firmware(fw); kfree(rec); if (postdata) { /* write data after firmware has been uploaded */ ret = usb6fire_fw_ezusb_write(device, 0xa0, postaddr, postdata, postlen); if (ret) { dev_err(&intf->dev, "unable to upload ezusb firmware %s: post urb.\n", fwname); return ret; } } data = 0x00; /* resume ezusb cpu */ ret = usb6fire_fw_ezusb_write(device, 0xa0, 0xe600, &data, 1); if (ret) { dev_err(&intf->dev, "unable to upload ezusb firmware %s: end message.\n", fwname); return ret; } return 0; } static int usb6fire_fw_fpga_upload( struct usb_interface *intf, const char *fwname) { int ret; int i; struct usb_device *device = interface_to_usbdev(intf); u8 *buffer = kmalloc(FPGA_BUFSIZE, GFP_KERNEL); const char *c; const char *end; const struct firmware *fw; if (!buffer) return -ENOMEM; ret = request_firmware(&fw, fwname, &device->dev); if (ret < 0) { dev_err(&intf->dev, "unable to get fpga firmware %s.\n", fwname); kfree(buffer); return -EIO; } c = fw->data; end = fw->data + fw->size; ret = usb6fire_fw_ezusb_write(device, 8, 0, NULL, 0); if (ret) { kfree(buffer); release_firmware(fw); dev_err(&intf->dev, "unable to upload fpga firmware: begin urb.\n"); return ret; } while (c != end) { for (i = 0; c != end && i < FPGA_BUFSIZE; i++, c++) buffer[i] = bitrev8((u8)*c); ret = usb6fire_fw_fpga_write(device, buffer, i); if (ret < 0) { release_firmware(fw); kfree(buffer); dev_err(&intf->dev, "unable to upload fpga firmware: fw urb.\n"); return ret; } } release_firmware(fw); kfree(buffer); ret = usb6fire_fw_ezusb_write(device, 9, 0, NULL, 0); if (ret) { dev_err(&intf->dev, "unable to upload fpga firmware: end urb.\n"); return ret; } return 0; } /* check, if the firmware version the devices has currently loaded * is known by this driver. 'version' needs to have 4 bytes version * info data. */ static int usb6fire_fw_check(struct usb_interface *intf, const u8 *version) { int i; for (i = 0; i < ARRAY_SIZE(known_fw_versions); i++) if (!memcmp(version, known_fw_versions + i, 2)) return 0; dev_err(&intf->dev, "invalid firmware version in device: %4ph. " "please reconnect to power. if this failure " "still happens, check your firmware installation.", version); return -EINVAL; } int usb6fire_fw_init(struct usb_interface *intf) { int i; int ret; struct usb_device *device = interface_to_usbdev(intf); /* buffer: 8 receiving bytes from device and * sizeof(EP_W_MAX_PACKET_SIZE) bytes for non-const copy */ u8 buffer[12]; ret = usb6fire_fw_ezusb_read(device, 1, 0, buffer, 8); if (ret) { dev_err(&intf->dev, "unable to receive device firmware state.\n"); return ret; } if (buffer[0] != 0xeb || buffer[1] != 0xaa || buffer[2] != 0x55) { dev_err(&intf->dev, "unknown device firmware state received from device:"); for (i = 0; i < 8; i++) printk(KERN_CONT "%02x ", buffer[i]); printk(KERN_CONT "\n"); return -EIO; } /* do we need fpga loader ezusb firmware? */ if (buffer[3] == 0x01) { ret = usb6fire_fw_ezusb_upload(intf, "6fire/dmx6firel2.ihx", 0, NULL, 0); if (ret < 0) return ret; return FW_NOT_READY; } /* do we need fpga firmware and application ezusb firmware? */ else if (buffer[3] == 0x02) { ret = usb6fire_fw_check(intf, buffer + 4); if (ret < 0) return ret; ret = usb6fire_fw_fpga_upload(intf, "6fire/dmx6firecf.bin"); if (ret < 0) return ret; memcpy(buffer, ep_w_max_packet_size, sizeof(ep_w_max_packet_size)); ret = usb6fire_fw_ezusb_upload(intf, "6fire/dmx6fireap.ihx", 0x0003, buffer, sizeof(ep_w_max_packet_size)); if (ret < 0) return ret; return FW_NOT_READY; } /* all fw loaded? */ else if (buffer[3] == 0x03) return usb6fire_fw_check(intf, buffer + 4); /* unknown data? */ else { dev_err(&intf->dev, "unknown device firmware state received from device: "); for (i = 0; i < 8; i++) printk(KERN_CONT "%02x ", buffer[i]); printk(KERN_CONT "\n"); return -EIO; } return 0; } |
| 41 77 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * ALSA sequencer Memory Manager * Copyright (c) 1998 by Frank van de Pol <fvdpol@coil.demon.nl> */ #ifndef __SND_SEQ_MEMORYMGR_H #define __SND_SEQ_MEMORYMGR_H #include <sound/seq_kernel.h> #include <linux/poll.h> struct snd_info_buffer; /* aliasing for legacy and UMP event packet handling */ union __snd_seq_event { struct snd_seq_event legacy; #if IS_ENABLED(CONFIG_SND_SEQ_UMP) struct snd_seq_ump_event ump; #endif struct { struct snd_seq_event event; #if IS_ENABLED(CONFIG_SND_SEQ_UMP) u32 extra; #endif } __packed raw; }; /* container for sequencer event (internal use) */ struct snd_seq_event_cell { union { struct snd_seq_event event; union __snd_seq_event ump; }; struct snd_seq_pool *pool; /* used pool */ struct snd_seq_event_cell *next; /* next cell */ }; /* design note: the pool is a contiguous block of memory, if we dynamicly want to add additional cells to the pool be better store this in another pool as we need to know the base address of the pool when releasing memory. */ struct snd_seq_pool { struct snd_seq_event_cell *ptr; /* pointer to first event chunk */ struct snd_seq_event_cell *free; /* pointer to the head of the free list */ int total_elements; /* pool size actually allocated */ atomic_t counter; /* cells free */ int size; /* pool size to be allocated */ int room; /* watermark for sleep/wakeup */ int closing; /* statistics */ int max_used; int event_alloc_nopool; int event_alloc_failures; int event_alloc_success; /* Write locking */ wait_queue_head_t output_sleep; /* Pool lock */ spinlock_t lock; }; void snd_seq_cell_free(struct snd_seq_event_cell *cell); int snd_seq_event_dup(struct snd_seq_pool *pool, struct snd_seq_event *event, struct snd_seq_event_cell **cellp, int nonblock, struct file *file, struct mutex *mutexp); /* return number of unused (free) cells */ static inline int snd_seq_unused_cells(struct snd_seq_pool *pool) { return pool ? pool->total_elements - atomic_read(&pool->counter) : 0; } /* return total number of allocated cells */ static inline int snd_seq_total_cells(struct snd_seq_pool *pool) { return pool ? pool->total_elements : 0; } /* init pool - allocate events */ int snd_seq_pool_init(struct snd_seq_pool *pool); /* done pool - free events */ void snd_seq_pool_mark_closing(struct snd_seq_pool *pool); int snd_seq_pool_done(struct snd_seq_pool *pool); /* create pool */ struct snd_seq_pool *snd_seq_pool_new(int poolsize); /* remove pool */ int snd_seq_pool_delete(struct snd_seq_pool **pool); /* polling */ int snd_seq_pool_poll_wait(struct snd_seq_pool *pool, struct file *file, poll_table *wait); void snd_seq_info_pool(struct snd_info_buffer *buffer, struct snd_seq_pool *pool, char *space); #endif |
| 3 3 3 3 3 3 3 3 3 3 3 3 3 3 6 6 6 6 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | /* * Route Plug-In * Copyright (c) 2000 by Abramo Bagnara <abramo@alsa-project.org> * * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Library General Public License as * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include <linux/time.h> #include <sound/core.h> #include <sound/pcm.h> #include "pcm_plugin.h" static void zero_areas(struct snd_pcm_plugin_channel *dvp, int ndsts, snd_pcm_uframes_t frames, snd_pcm_format_t format) { int dst = 0; for (; dst < ndsts; ++dst) { if (dvp->wanted) snd_pcm_area_silence(&dvp->area, 0, frames, format); dvp->enabled = 0; dvp++; } } static inline void copy_area(const struct snd_pcm_plugin_channel *src_channel, struct snd_pcm_plugin_channel *dst_channel, snd_pcm_uframes_t frames, snd_pcm_format_t format) { dst_channel->enabled = 1; snd_pcm_area_copy(&src_channel->area, 0, &dst_channel->area, 0, frames, format); } static snd_pcm_sframes_t route_transfer(struct snd_pcm_plugin *plugin, const struct snd_pcm_plugin_channel *src_channels, struct snd_pcm_plugin_channel *dst_channels, snd_pcm_uframes_t frames) { int nsrcs, ndsts, dst; struct snd_pcm_plugin_channel *dvp; snd_pcm_format_t format; if (snd_BUG_ON(!plugin || !src_channels || !dst_channels)) return -ENXIO; if (frames == 0) return 0; if (frames > dst_channels[0].frames) frames = dst_channels[0].frames; nsrcs = plugin->src_format.channels; ndsts = plugin->dst_format.channels; format = plugin->dst_format.format; dvp = dst_channels; if (nsrcs <= 1) { /* expand to all channels */ for (dst = 0; dst < ndsts; ++dst) { copy_area(src_channels, dvp, frames, format); dvp++; } return frames; } for (dst = 0; dst < ndsts && dst < nsrcs; ++dst) { copy_area(src_channels, dvp, frames, format); dvp++; src_channels++; } if (dst < ndsts) zero_areas(dvp, ndsts - dst, frames, format); return frames; } int snd_pcm_plugin_build_route(struct snd_pcm_substream *plug, struct snd_pcm_plugin_format *src_format, struct snd_pcm_plugin_format *dst_format, struct snd_pcm_plugin **r_plugin) { struct snd_pcm_plugin *plugin; int err; if (snd_BUG_ON(!r_plugin)) return -ENXIO; *r_plugin = NULL; if (snd_BUG_ON(src_format->rate != dst_format->rate)) return -ENXIO; if (snd_BUG_ON(src_format->format != dst_format->format)) return -ENXIO; err = snd_pcm_plugin_build(plug, "route conversion", src_format, dst_format, 0, &plugin); if (err < 0) return err; plugin->transfer = route_transfer; *r_plugin = plugin; return 0; } |
| 136 136 135 133 135 343 137 350 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 | // SPDX-License-Identifier: GPL-2.0-only /* * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x. * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org> */ #include <linux/module.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <net/sock.h> #include <net/route.h> #include <linux/ip.h> #include <net/ip.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("iptables mangle table"); #define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ (1 << NF_INET_LOCAL_IN) | \ (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) | \ (1 << NF_INET_POST_ROUTING)) static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, .priority = NF_IP_PRI_MANGLE, }; static unsigned int ipt_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { unsigned int ret, verdict; const struct iphdr *iph; __be32 saddr, daddr; u32 mark; int err; u8 tos; /* Save things which could affect route */ mark = skb->mark; iph = ip_hdr(skb); saddr = iph->saddr; daddr = iph->daddr; tos = iph->tos; ret = ipt_do_table(priv, skb, state); verdict = ret & NF_VERDICT_MASK; /* Reroute for ANY change. */ if (verdict != NF_DROP && verdict != NF_STOLEN) { iph = ip_hdr(skb); if (iph->saddr != saddr || iph->daddr != daddr || skb->mark != mark || iph->tos != tos) { err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC); if (err < 0) ret = NF_DROP_ERR(err); } } return ret; } /* The work comes in here from netfilter.c. */ static unsigned int iptable_mangle_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { if (state->hook == NF_INET_LOCAL_OUT) return ipt_mangle_out(priv, skb, state); return ipt_do_table(priv, skb, state); } static struct nf_hook_ops *mangle_ops __read_mostly; static int iptable_mangle_table_init(struct net *net) { struct ipt_replace *repl; int ret; repl = ipt_alloc_initial_table(&packet_mangler); if (repl == NULL) return -ENOMEM; ret = ipt_register_table(net, &packet_mangler, repl, mangle_ops); kfree(repl); return ret; } static void __net_exit iptable_mangle_net_pre_exit(struct net *net) { ipt_unregister_table_pre_exit(net, "mangle"); } static void __net_exit iptable_mangle_net_exit(struct net *net) { ipt_unregister_table_exit(net, "mangle"); } static struct pernet_operations iptable_mangle_net_ops = { .pre_exit = iptable_mangle_net_pre_exit, .exit = iptable_mangle_net_exit, }; static int __init iptable_mangle_init(void) { int ret = xt_register_template(&packet_mangler, iptable_mangle_table_init); if (ret < 0) return ret; mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook); if (IS_ERR(mangle_ops)) { xt_unregister_template(&packet_mangler); ret = PTR_ERR(mangle_ops); return ret; } ret = register_pernet_subsys(&iptable_mangle_net_ops); if (ret < 0) { xt_unregister_template(&packet_mangler); kfree(mangle_ops); return ret; } return ret; } static void __exit iptable_mangle_fini(void) { unregister_pernet_subsys(&iptable_mangle_net_ops); xt_unregister_template(&packet_mangler); kfree(mangle_ops); } module_init(iptable_mangle_init); module_exit(iptable_mangle_fini); |
| 18 18 18 18 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | // SPDX-License-Identifier: GPL-2.0-or-later /* Signature verification with an asymmetric key * * See Documentation/crypto/asymmetric-keys.rst * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) "SIG: "fmt #include <keys/asymmetric-subtype.h> #include <linux/export.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/keyctl.h> #include <crypto/public_key.h> #include <keys/user-type.h> #include "asymmetric_keys.h" /* * Destroy a public key signature. */ void public_key_signature_free(struct public_key_signature *sig) { int i; if (sig) { for (i = 0; i < ARRAY_SIZE(sig->auth_ids); i++) kfree(sig->auth_ids[i]); kfree(sig->s); kfree(sig->digest); kfree(sig); } } EXPORT_SYMBOL_GPL(public_key_signature_free); /** * query_asymmetric_key - Get information about an asymmetric key. * @params: Various parameters. * @info: Where to put the information. */ int query_asymmetric_key(const struct kernel_pkey_params *params, struct kernel_pkey_query *info) { const struct asymmetric_key_subtype *subtype; struct key *key = params->key; int ret; pr_devel("==>%s()\n", __func__); if (key->type != &key_type_asymmetric) return -EINVAL; subtype = asymmetric_key_subtype(key); if (!subtype || !key->payload.data[0]) return -EINVAL; if (!subtype->query) return -ENOTSUPP; ret = subtype->query(params, info); pr_devel("<==%s() = %d\n", __func__, ret); return ret; } EXPORT_SYMBOL_GPL(query_asymmetric_key); /** * verify_signature - Initiate the use of an asymmetric key to verify a signature * @key: The asymmetric key to verify against * @sig: The signature to check * * Returns 0 if successful or else an error. */ int verify_signature(const struct key *key, const struct public_key_signature *sig) { const struct asymmetric_key_subtype *subtype; int ret; pr_devel("==>%s()\n", __func__); if (key->type != &key_type_asymmetric) return -EINVAL; subtype = asymmetric_key_subtype(key); if (!subtype || !key->payload.data[0]) return -EINVAL; if (!subtype->verify_signature) return -ENOTSUPP; ret = subtype->verify_signature(key, sig); pr_devel("<==%s() = %d\n", __func__, ret); return ret; } EXPORT_SYMBOL_GPL(verify_signature); |
| 138 138 1 138 138 22 137 138 138 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Resilient Queued Spin Lock * * (C) Copyright 2024-2025 Meta Platforms, Inc. and affiliates. * * Authors: Kumar Kartikeya Dwivedi <memxor@gmail.com> */ #ifndef __ASM_GENERIC_RQSPINLOCK_H #define __ASM_GENERIC_RQSPINLOCK_H #include <linux/types.h> #include <vdso/time64.h> #include <linux/percpu.h> #ifdef CONFIG_QUEUED_SPINLOCKS #include <asm/qspinlock.h> #endif struct rqspinlock { union { atomic_t val; u32 locked; }; }; /* Even though this is same as struct rqspinlock, we need to emit a distinct * type in BTF for BPF programs. */ struct bpf_res_spin_lock { u32 val; }; struct qspinlock; #ifdef CONFIG_QUEUED_SPINLOCKS typedef struct qspinlock rqspinlock_t; #else typedef struct rqspinlock rqspinlock_t; #endif extern int resilient_tas_spin_lock(rqspinlock_t *lock); #ifdef CONFIG_QUEUED_SPINLOCKS extern int resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val); #endif #ifndef resilient_virt_spin_lock_enabled static __always_inline bool resilient_virt_spin_lock_enabled(void) { return false; } #endif #ifndef resilient_virt_spin_lock static __always_inline int resilient_virt_spin_lock(rqspinlock_t *lock) { return 0; } #endif /* * Default timeout for waiting loops is 0.25 seconds */ #define RES_DEF_TIMEOUT (NSEC_PER_SEC / 4) /* * Choose 31 as it makes rqspinlock_held cacheline-aligned. */ #define RES_NR_HELD 31 struct rqspinlock_held { int cnt; void *locks[RES_NR_HELD]; }; DECLARE_PER_CPU_ALIGNED(struct rqspinlock_held, rqspinlock_held_locks); static __always_inline void grab_held_lock_entry(void *lock) { int cnt = this_cpu_inc_return(rqspinlock_held_locks.cnt); if (unlikely(cnt > RES_NR_HELD)) { /* Still keep the inc so we decrement later. */ return; } /* * Implied compiler barrier in per-CPU operations; otherwise we can have * the compiler reorder inc with write to table, allowing interrupts to * overwrite and erase our write to the table (as on interrupt exit it * will be reset to NULL). * * It is fine for cnt inc to be reordered wrt remote readers though, * they won't observe our entry until the cnt update is visible, that's * all. */ this_cpu_write(rqspinlock_held_locks.locks[cnt - 1], lock); } /* * We simply don't support out-of-order unlocks, and keep the logic simple here. * The verifier prevents BPF programs from unlocking out-of-order, and the same * holds for in-kernel users. * * It is possible to run into misdetection scenarios of AA deadlocks on the same * CPU, and missed ABBA deadlocks on remote CPUs if this function pops entries * out of order (due to lock A, lock B, unlock A, unlock B) pattern. The correct * logic to preserve right entries in the table would be to walk the array of * held locks and swap and clear out-of-order entries, but that's too * complicated and we don't have a compelling use case for out of order unlocking. */ static __always_inline void release_held_lock_entry(void) { struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks); if (unlikely(rqh->cnt > RES_NR_HELD)) goto dec; WRITE_ONCE(rqh->locks[rqh->cnt - 1], NULL); dec: /* * Reordering of clearing above with inc and its write in * grab_held_lock_entry that came before us (in same acquisition * attempt) is ok, we either see a valid entry or NULL when it's * visible. * * But this helper is invoked when we unwind upon failing to acquire the * lock. Unlike the unlock path which constitutes a release store after * we clear the entry, we need to emit a write barrier here. Otherwise, * we may have a situation as follows: * * <error> for lock B * release_held_lock_entry * * try_cmpxchg_acquire for lock A * grab_held_lock_entry * * Lack of any ordering means reordering may occur such that dec, inc * are done before entry is overwritten. This permits a remote lock * holder of lock B (which this CPU failed to acquire) to now observe it * as being attempted on this CPU, and may lead to misdetection (if this * CPU holds a lock it is attempting to acquire, leading to false ABBA * diagnosis). * * In case of unlock, we will always do a release on the lock word after * releasing the entry, ensuring that other CPUs cannot hold the lock * (and make conclusions about deadlocks) until the entry has been * cleared on the local CPU, preventing any anomalies. Reordering is * still possible there, but a remote CPU cannot observe a lock in our * table which it is already holding, since visibility entails our * release store for the said lock has not retired. * * In theory we don't have a problem if the dec and WRITE_ONCE above get * reordered with each other, we either notice an empty NULL entry on * top (if dec succeeds WRITE_ONCE), or a potentially stale entry which * cannot be observed (if dec precedes WRITE_ONCE). * * Emit the write barrier _before_ the dec, this permits dec-inc * reordering but that is harmless as we'd have new entry set to NULL * already, i.e. they cannot precede the NULL store above. */ smp_wmb(); this_cpu_dec(rqspinlock_held_locks.cnt); } #ifdef CONFIG_QUEUED_SPINLOCKS /** * res_spin_lock - acquire a queued spinlock * @lock: Pointer to queued spinlock structure * * Return: * * 0 - Lock was acquired successfully. * * -EDEADLK - Lock acquisition failed because of AA/ABBA deadlock. * * -ETIMEDOUT - Lock acquisition failed because of timeout. */ static __always_inline int res_spin_lock(rqspinlock_t *lock) { int val = 0; if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) { grab_held_lock_entry(lock); return 0; } return resilient_queued_spin_lock_slowpath(lock, val); } #else #define res_spin_lock(lock) resilient_tas_spin_lock(lock) #endif /* CONFIG_QUEUED_SPINLOCKS */ static __always_inline void res_spin_unlock(rqspinlock_t *lock) { struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks); if (unlikely(rqh->cnt > RES_NR_HELD)) goto unlock; WRITE_ONCE(rqh->locks[rqh->cnt - 1], NULL); unlock: /* * Release barrier, ensures correct ordering. See release_held_lock_entry * for details. Perform release store instead of queued_spin_unlock, * since we use this function for test-and-set fallback as well. When we * have CONFIG_QUEUED_SPINLOCKS=n, we clear the full 4-byte lockword. * * Like release_held_lock_entry, we can do the release before the dec. * We simply care about not seeing the 'lock' in our table from a remote * CPU once the lock has been released, which doesn't rely on the dec. * * Unlike smp_wmb(), release is not a two way fence, hence it is * possible for a inc to move up and reorder with our clearing of the * entry. This isn't a problem however, as for a misdiagnosis of ABBA, * the remote CPU needs to hold this lock, which won't be released until * the store below is done, which would ensure the entry is overwritten * to NULL, etc. */ smp_store_release(&lock->locked, 0); this_cpu_dec(rqspinlock_held_locks.cnt); } #ifdef CONFIG_QUEUED_SPINLOCKS #define raw_res_spin_lock_init(lock) ({ *(lock) = (rqspinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; }) #else #define raw_res_spin_lock_init(lock) ({ *(lock) = (rqspinlock_t){0}; }) #endif #define raw_res_spin_lock(lock) \ ({ \ int __ret; \ preempt_disable(); \ __ret = res_spin_lock(lock); \ if (__ret) \ preempt_enable(); \ __ret; \ }) #define raw_res_spin_unlock(lock) ({ res_spin_unlock(lock); preempt_enable(); }) #define raw_res_spin_lock_irqsave(lock, flags) \ ({ \ int __ret; \ local_irq_save(flags); \ __ret = raw_res_spin_lock(lock); \ if (__ret) \ local_irq_restore(flags); \ __ret; \ }) #define raw_res_spin_unlock_irqrestore(lock, flags) ({ raw_res_spin_unlock(lock); local_irq_restore(flags); }) #endif /* __ASM_GENERIC_RQSPINLOCK_H */ |
| 1 1 1 1 4 4 4 4 4 1 1 4 1 4 3 3 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | // SPDX-License-Identifier: GPL-2.0-or-later /* * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org> * (C) 2012 by Vyatta Inc. <http://www.vyatta.com> */ #include <linux/types.h> #include <linux/netfilter.h> #include <linux/skbuff.h> #include <linux/vmalloc.h> #include <linux/stddef.h> #include <linux/err.h> #include <linux/percpu.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/slab.h> #include <linux/export.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_timeout.h> const struct nf_ct_timeout_hooks __rcu *nf_ct_timeout_hook __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_timeout_hook); static int untimeout(struct nf_conn *ct, void *timeout) { struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct); if (timeout_ext) { const struct nf_ct_timeout *t; t = rcu_access_pointer(timeout_ext->timeout); if (!timeout || t == timeout) RCU_INIT_POINTER(timeout_ext->timeout, NULL); } /* We are not intended to delete this conntrack. */ return 0; } void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout) { struct nf_ct_iter_data iter_data = { .net = net, .data = timeout, }; nf_ct_iterate_cleanup_net(untimeout, &iter_data); } EXPORT_SYMBOL_GPL(nf_ct_untimeout); static void __nf_ct_timeout_put(struct nf_ct_timeout *timeout) { const struct nf_ct_timeout_hooks *h = rcu_dereference(nf_ct_timeout_hook); if (h) h->timeout_put(timeout); } int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, u8 l3num, u8 l4num, const char *timeout_name) { const struct nf_ct_timeout_hooks *h; struct nf_ct_timeout *timeout; struct nf_conn_timeout *timeout_ext; const char *errmsg = NULL; int ret = 0; rcu_read_lock(); h = rcu_dereference(nf_ct_timeout_hook); if (!h) { ret = -ENOENT; errmsg = "Timeout policy base is empty"; goto out; } timeout = h->timeout_find_get(net, timeout_name); if (!timeout) { ret = -ENOENT; pr_info_ratelimited("No such timeout policy \"%s\"\n", timeout_name); goto out; } if (timeout->l3num != l3num) { ret = -EINVAL; pr_info_ratelimited("Timeout policy `%s' can only be used by " "L%d protocol number %d\n", timeout_name, 3, timeout->l3num); goto err_put_timeout; } /* Make sure the timeout policy matches any existing protocol tracker, * otherwise default to generic. */ if (timeout->l4proto->l4proto != l4num) { ret = -EINVAL; pr_info_ratelimited("Timeout policy `%s' can only be used by " "L%d protocol number %d\n", timeout_name, 4, timeout->l4proto->l4proto); goto err_put_timeout; } timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC); if (!timeout_ext) { ret = -ENOMEM; goto err_put_timeout; } rcu_read_unlock(); return ret; err_put_timeout: __nf_ct_timeout_put(timeout); out: rcu_read_unlock(); if (errmsg) pr_info_ratelimited("%s\n", errmsg); return ret; } EXPORT_SYMBOL_GPL(nf_ct_set_timeout); void nf_ct_destroy_timeout(struct nf_conn *ct) { struct nf_conn_timeout *timeout_ext; const struct nf_ct_timeout_hooks *h; rcu_read_lock(); h = rcu_dereference(nf_ct_timeout_hook); if (h) { timeout_ext = nf_ct_timeout_find(ct); if (timeout_ext) { struct nf_ct_timeout *t; t = rcu_dereference(timeout_ext->timeout); if (t) h->timeout_put(t); RCU_INIT_POINTER(timeout_ext->timeout, NULL); } } rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_ct_destroy_timeout); |
| 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 | // SPDX-License-Identifier: GPL-2.0-only /* * NFC Digital Protocol stack * Copyright (c) 2013, Intel Corporation. */ #define pr_fmt(fmt) "digital: %s: " fmt, __func__ #include <linux/module.h> #include "digital.h" #define DIGITAL_PROTO_NFCA_RF_TECH \ (NFC_PROTO_JEWEL_MASK | NFC_PROTO_MIFARE_MASK | \ NFC_PROTO_NFC_DEP_MASK | NFC_PROTO_ISO14443_MASK) #define DIGITAL_PROTO_NFCB_RF_TECH NFC_PROTO_ISO14443_B_MASK #define DIGITAL_PROTO_NFCF_RF_TECH \ (NFC_PROTO_FELICA_MASK | NFC_PROTO_NFC_DEP_MASK) #define DIGITAL_PROTO_ISO15693_RF_TECH NFC_PROTO_ISO15693_MASK /* Delay between each poll frame (ms) */ #define DIGITAL_POLL_INTERVAL 10 struct digital_cmd { struct list_head queue; u8 type; u8 pending; u16 timeout; struct sk_buff *req; struct sk_buff *resp; struct digital_tg_mdaa_params *mdaa_params; nfc_digital_cmd_complete_t cmd_cb; void *cb_context; }; struct sk_buff *digital_skb_alloc(struct nfc_digital_dev *ddev, unsigned int len) { struct sk_buff *skb; skb = alloc_skb(len + ddev->tx_headroom + ddev->tx_tailroom, GFP_KERNEL); if (skb) skb_reserve(skb, ddev->tx_headroom); return skb; } void digital_skb_add_crc(struct sk_buff *skb, crc_func_t crc_func, u16 init, u8 bitwise_inv, u8 msb_first) { u16 crc; crc = crc_func(init, skb->data, skb->len); if (bitwise_inv) crc = ~crc; if (msb_first) crc = __fswab16(crc); skb_put_u8(skb, crc & 0xFF); skb_put_u8(skb, (crc >> 8) & 0xFF); } int digital_skb_check_crc(struct sk_buff *skb, crc_func_t crc_func, u16 crc_init, u8 bitwise_inv, u8 msb_first) { int rc; u16 crc; if (skb->len <= 2) return -EIO; crc = crc_func(crc_init, skb->data, skb->len - 2); if (bitwise_inv) crc = ~crc; if (msb_first) crc = __swab16(crc); rc = (skb->data[skb->len - 2] - (crc & 0xFF)) + (skb->data[skb->len - 1] - ((crc >> 8) & 0xFF)); if (rc) return -EIO; skb_trim(skb, skb->len - 2); return 0; } static inline void digital_switch_rf(struct nfc_digital_dev *ddev, bool on) { ddev->ops->switch_rf(ddev, on); } static inline void digital_abort_cmd(struct nfc_digital_dev *ddev) { ddev->ops->abort_cmd(ddev); } static void digital_wq_cmd_complete(struct work_struct *work) { struct digital_cmd *cmd; struct nfc_digital_dev *ddev = container_of(work, struct nfc_digital_dev, cmd_complete_work); mutex_lock(&ddev->cmd_lock); cmd = list_first_entry_or_null(&ddev->cmd_queue, struct digital_cmd, queue); if (!cmd) { mutex_unlock(&ddev->cmd_lock); return; } list_del(&cmd->queue); mutex_unlock(&ddev->cmd_lock); if (!IS_ERR(cmd->resp)) print_hex_dump_debug("DIGITAL RX: ", DUMP_PREFIX_NONE, 16, 1, cmd->resp->data, cmd->resp->len, false); cmd->cmd_cb(ddev, cmd->cb_context, cmd->resp); kfree(cmd->mdaa_params); kfree(cmd); schedule_work(&ddev->cmd_work); } static void digital_send_cmd_complete(struct nfc_digital_dev *ddev, void *arg, struct sk_buff *resp) { struct digital_cmd *cmd = arg; cmd->resp = resp; schedule_work(&ddev->cmd_complete_work); } static void digital_wq_cmd(struct work_struct *work) { int rc; struct digital_cmd *cmd; struct digital_tg_mdaa_params *params; struct nfc_digital_dev *ddev = container_of(work, struct nfc_digital_dev, cmd_work); mutex_lock(&ddev->cmd_lock); cmd = list_first_entry_or_null(&ddev->cmd_queue, struct digital_cmd, queue); if (!cmd || cmd->pending) { mutex_unlock(&ddev->cmd_lock); return; } cmd->pending = 1; mutex_unlock(&ddev->cmd_lock); if (cmd->req) print_hex_dump_debug("DIGITAL TX: ", DUMP_PREFIX_NONE, 16, 1, cmd->req->data, cmd->req->len, false); switch (cmd->type) { case DIGITAL_CMD_IN_SEND: rc = ddev->ops->in_send_cmd(ddev, cmd->req, cmd->timeout, digital_send_cmd_complete, cmd); break; case DIGITAL_CMD_TG_SEND: rc = ddev->ops->tg_send_cmd(ddev, cmd->req, cmd->timeout, digital_send_cmd_complete, cmd); break; case DIGITAL_CMD_TG_LISTEN: rc = ddev->ops->tg_listen(ddev, cmd->timeout, digital_send_cmd_complete, cmd); break; case DIGITAL_CMD_TG_LISTEN_MDAA: params = cmd->mdaa_params; rc = ddev->ops->tg_listen_mdaa(ddev, params, cmd->timeout, digital_send_cmd_complete, cmd); break; case DIGITAL_CMD_TG_LISTEN_MD: rc = ddev->ops->tg_listen_md(ddev, cmd->timeout, digital_send_cmd_complete, cmd); break; default: pr_err("Unknown cmd type %d\n", cmd->type); return; } if (!rc) return; pr_err("in_send_command returned err %d\n", rc); mutex_lock(&ddev->cmd_lock); list_del(&cmd->queue); mutex_unlock(&ddev->cmd_lock); kfree_skb(cmd->req); kfree(cmd->mdaa_params); kfree(cmd); schedule_work(&ddev->cmd_work); } int digital_send_cmd(struct nfc_digital_dev *ddev, u8 cmd_type, struct sk_buff *skb, struct digital_tg_mdaa_params *params, u16 timeout, nfc_digital_cmd_complete_t cmd_cb, void *cb_context) { struct digital_cmd *cmd; cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; cmd->type = cmd_type; cmd->timeout = timeout; cmd->req = skb; cmd->mdaa_params = params; cmd->cmd_cb = cmd_cb; cmd->cb_context = cb_context; INIT_LIST_HEAD(&cmd->queue); mutex_lock(&ddev->cmd_lock); list_add_tail(&cmd->queue, &ddev->cmd_queue); mutex_unlock(&ddev->cmd_lock); schedule_work(&ddev->cmd_work); return 0; } int digital_in_configure_hw(struct nfc_digital_dev *ddev, int type, int param) { int rc; rc = ddev->ops->in_configure_hw(ddev, type, param); if (rc) pr_err("in_configure_hw failed: %d\n", rc); return rc; } int digital_tg_configure_hw(struct nfc_digital_dev *ddev, int type, int param) { int rc; rc = ddev->ops->tg_configure_hw(ddev, type, param); if (rc) pr_err("tg_configure_hw failed: %d\n", rc); return rc; } static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech) { struct digital_tg_mdaa_params *params; int rc; params = kzalloc(sizeof(*params), GFP_KERNEL); if (!params) return -ENOMEM; params->sens_res = DIGITAL_SENS_RES_NFC_DEP; get_random_bytes(params->nfcid1, sizeof(params->nfcid1)); params->sel_res = DIGITAL_SEL_RES_NFC_DEP; params->nfcid2[0] = DIGITAL_SENSF_NFCID2_NFC_DEP_B1; params->nfcid2[1] = DIGITAL_SENSF_NFCID2_NFC_DEP_B2; get_random_bytes(params->nfcid2 + 2, NFC_NFCID2_MAXSIZE - 2); params->sc = DIGITAL_SENSF_FELICA_SC; rc = digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MDAA, NULL, params, 500, digital_tg_recv_atr_req, NULL); if (rc) kfree(params); return rc; } static int digital_tg_listen_md(struct nfc_digital_dev *ddev, u8 rf_tech) { return digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MD, NULL, NULL, 500, digital_tg_recv_md_req, NULL); } int digital_target_found(struct nfc_digital_dev *ddev, struct nfc_target *target, u8 protocol) { int rc; u8 framing; u8 rf_tech; u8 poll_tech_count; int (*check_crc)(struct sk_buff *skb); void (*add_crc)(struct sk_buff *skb); rf_tech = ddev->poll_techs[ddev->poll_tech_index].rf_tech; switch (protocol) { case NFC_PROTO_JEWEL: framing = NFC_DIGITAL_FRAMING_NFCA_T1T; check_crc = digital_skb_check_crc_b; add_crc = digital_skb_add_crc_b; break; case NFC_PROTO_MIFARE: framing = NFC_DIGITAL_FRAMING_NFCA_T2T; check_crc = digital_skb_check_crc_a; add_crc = digital_skb_add_crc_a; break; case NFC_PROTO_FELICA: framing = NFC_DIGITAL_FRAMING_NFCF_T3T; check_crc = digital_skb_check_crc_f; add_crc = digital_skb_add_crc_f; break; case NFC_PROTO_NFC_DEP: if (rf_tech == NFC_DIGITAL_RF_TECH_106A) { framing = NFC_DIGITAL_FRAMING_NFCA_NFC_DEP; check_crc = digital_skb_check_crc_a; add_crc = digital_skb_add_crc_a; } else { framing = NFC_DIGITAL_FRAMING_NFCF_NFC_DEP; check_crc = digital_skb_check_crc_f; add_crc = digital_skb_add_crc_f; } break; case NFC_PROTO_ISO15693: framing = NFC_DIGITAL_FRAMING_ISO15693_T5T; check_crc = digital_skb_check_crc_b; add_crc = digital_skb_add_crc_b; break; case NFC_PROTO_ISO14443: framing = NFC_DIGITAL_FRAMING_NFCA_T4T; check_crc = digital_skb_check_crc_a; add_crc = digital_skb_add_crc_a; break; case NFC_PROTO_ISO14443_B: framing = NFC_DIGITAL_FRAMING_NFCB_T4T; check_crc = digital_skb_check_crc_b; add_crc = digital_skb_add_crc_b; break; default: pr_err("Invalid protocol %d\n", protocol); return -EINVAL; } pr_debug("rf_tech=%d, protocol=%d\n", rf_tech, protocol); ddev->curr_rf_tech = rf_tech; if (DIGITAL_DRV_CAPS_IN_CRC(ddev)) { ddev->skb_add_crc = digital_skb_add_crc_none; ddev->skb_check_crc = digital_skb_check_crc_none; } else { ddev->skb_add_crc = add_crc; ddev->skb_check_crc = check_crc; } rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING, framing); if (rc) return rc; target->supported_protocols = (1 << protocol); poll_tech_count = ddev->poll_tech_count; ddev->poll_tech_count = 0; rc = nfc_targets_found(ddev->nfc_dev, target, 1); if (rc) { ddev->poll_tech_count = poll_tech_count; return rc; } return 0; } void digital_poll_next_tech(struct nfc_digital_dev *ddev) { u8 rand_mod; digital_switch_rf(ddev, 0); mutex_lock(&ddev->poll_lock); if (!ddev->poll_tech_count) { mutex_unlock(&ddev->poll_lock); return; } get_random_bytes(&rand_mod, sizeof(rand_mod)); ddev->poll_tech_index = rand_mod % ddev->poll_tech_count; mutex_unlock(&ddev->poll_lock); schedule_delayed_work(&ddev->poll_work, msecs_to_jiffies(DIGITAL_POLL_INTERVAL)); } static void digital_wq_poll(struct work_struct *work) { int rc; struct digital_poll_tech *poll_tech; struct nfc_digital_dev *ddev = container_of(work, struct nfc_digital_dev, poll_work.work); mutex_lock(&ddev->poll_lock); if (!ddev->poll_tech_count) { mutex_unlock(&ddev->poll_lock); return; } poll_tech = &ddev->poll_techs[ddev->poll_tech_index]; mutex_unlock(&ddev->poll_lock); rc = poll_tech->poll_func(ddev, poll_tech->rf_tech); if (rc) digital_poll_next_tech(ddev); } static void digital_add_poll_tech(struct nfc_digital_dev *ddev, u8 rf_tech, digital_poll_t poll_func) { struct digital_poll_tech *poll_tech; if (ddev->poll_tech_count >= NFC_DIGITAL_POLL_MODE_COUNT_MAX) return; poll_tech = &ddev->poll_techs[ddev->poll_tech_count++]; poll_tech->rf_tech = rf_tech; poll_tech->poll_func = poll_func; } /** * digital_start_poll - start_poll operation * @nfc_dev: device to be polled * @im_protocols: bitset of nfc initiator protocols to be used for polling * @tm_protocols: bitset of nfc transport protocols to be used for polling * * For every supported protocol, the corresponding polling function is added * to the table of polling technologies (ddev->poll_techs[]) using * digital_add_poll_tech(). * When a polling function fails (by timeout or protocol error) the next one is * schedule by digital_poll_next_tech() on the poll workqueue (ddev->poll_work). */ static int digital_start_poll(struct nfc_dev *nfc_dev, __u32 im_protocols, __u32 tm_protocols) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); u32 matching_im_protocols, matching_tm_protocols; pr_debug("protocols: im 0x%x, tm 0x%x, supported 0x%x\n", im_protocols, tm_protocols, ddev->protocols); matching_im_protocols = ddev->protocols & im_protocols; matching_tm_protocols = ddev->protocols & tm_protocols; if (!matching_im_protocols && !matching_tm_protocols) { pr_err("Unknown protocol\n"); return -EINVAL; } if (ddev->poll_tech_count) { pr_err("Already polling\n"); return -EBUSY; } if (ddev->curr_protocol) { pr_err("A target is already active\n"); return -EBUSY; } ddev->poll_tech_count = 0; ddev->poll_tech_index = 0; if (matching_im_protocols & DIGITAL_PROTO_NFCA_RF_TECH) digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106A, digital_in_send_sens_req); if (matching_im_protocols & DIGITAL_PROTO_NFCB_RF_TECH) digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106B, digital_in_send_sensb_req); if (matching_im_protocols & DIGITAL_PROTO_NFCF_RF_TECH) { digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_212F, digital_in_send_sensf_req); digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_424F, digital_in_send_sensf_req); } if (matching_im_protocols & DIGITAL_PROTO_ISO15693_RF_TECH) digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_ISO15693, digital_in_send_iso15693_inv_req); if (matching_tm_protocols & NFC_PROTO_NFC_DEP_MASK) { if (ddev->ops->tg_listen_mdaa) { digital_add_poll_tech(ddev, 0, digital_tg_listen_mdaa); } else if (ddev->ops->tg_listen_md) { digital_add_poll_tech(ddev, 0, digital_tg_listen_md); } else { digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106A, digital_tg_listen_nfca); digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_212F, digital_tg_listen_nfcf); digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_424F, digital_tg_listen_nfcf); } } if (!ddev->poll_tech_count) { pr_err("Unsupported protocols: im=0x%x, tm=0x%x\n", matching_im_protocols, matching_tm_protocols); return -EINVAL; } schedule_delayed_work(&ddev->poll_work, 0); return 0; } static void digital_stop_poll(struct nfc_dev *nfc_dev) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); mutex_lock(&ddev->poll_lock); if (!ddev->poll_tech_count) { pr_err("Polling operation was not running\n"); mutex_unlock(&ddev->poll_lock); return; } ddev->poll_tech_count = 0; mutex_unlock(&ddev->poll_lock); cancel_delayed_work_sync(&ddev->poll_work); digital_abort_cmd(ddev); } static int digital_dev_up(struct nfc_dev *nfc_dev) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); digital_switch_rf(ddev, 1); return 0; } static int digital_dev_down(struct nfc_dev *nfc_dev) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); digital_switch_rf(ddev, 0); return 0; } static int digital_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target, __u8 comm_mode, __u8 *gb, size_t gb_len) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); int rc; rc = digital_in_send_atr_req(ddev, target, comm_mode, gb, gb_len); if (!rc) ddev->curr_protocol = NFC_PROTO_NFC_DEP; return rc; } static int digital_dep_link_down(struct nfc_dev *nfc_dev) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); digital_abort_cmd(ddev); ddev->curr_protocol = 0; return 0; } static int digital_activate_target(struct nfc_dev *nfc_dev, struct nfc_target *target, __u32 protocol) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); if (ddev->poll_tech_count) { pr_err("Can't activate a target while polling\n"); return -EBUSY; } if (ddev->curr_protocol) { pr_err("A target is already active\n"); return -EBUSY; } ddev->curr_protocol = protocol; return 0; } static void digital_deactivate_target(struct nfc_dev *nfc_dev, struct nfc_target *target, u8 mode) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); if (!ddev->curr_protocol) { pr_err("No active target\n"); return; } digital_abort_cmd(ddev); ddev->curr_protocol = 0; } static int digital_tg_send(struct nfc_dev *dev, struct sk_buff *skb) { struct nfc_digital_dev *ddev = nfc_get_drvdata(dev); return digital_tg_send_dep_res(ddev, skb); } static void digital_in_send_complete(struct nfc_digital_dev *ddev, void *arg, struct sk_buff *resp) { struct digital_data_exch *data_exch = arg; int rc; if (IS_ERR(resp)) { rc = PTR_ERR(resp); resp = NULL; goto done; } if (ddev->curr_protocol == NFC_PROTO_MIFARE) { rc = digital_in_recv_mifare_res(resp); /* crc check is done in digital_in_recv_mifare_res() */ goto done; } if ((ddev->curr_protocol == NFC_PROTO_ISO14443) || (ddev->curr_protocol == NFC_PROTO_ISO14443_B)) { rc = digital_in_iso_dep_pull_sod(ddev, resp); if (rc) goto done; } rc = ddev->skb_check_crc(resp); done: if (rc) { kfree_skb(resp); resp = NULL; } data_exch->cb(data_exch->cb_context, resp, rc); kfree(data_exch); } static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target, struct sk_buff *skb, data_exchange_cb_t cb, void *cb_context) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); struct digital_data_exch *data_exch; int rc; data_exch = kzalloc(sizeof(*data_exch), GFP_KERNEL); if (!data_exch) return -ENOMEM; data_exch->cb = cb; data_exch->cb_context = cb_context; if (ddev->curr_protocol == NFC_PROTO_NFC_DEP) { rc = digital_in_send_dep_req(ddev, target, skb, data_exch); goto exit; } if ((ddev->curr_protocol == NFC_PROTO_ISO14443) || (ddev->curr_protocol == NFC_PROTO_ISO14443_B)) { rc = digital_in_iso_dep_push_sod(ddev, skb); if (rc) goto exit; } ddev->skb_add_crc(skb); rc = digital_in_send_cmd(ddev, skb, 500, digital_in_send_complete, data_exch); exit: if (rc) kfree(data_exch); return rc; } static const struct nfc_ops digital_nfc_ops = { .dev_up = digital_dev_up, .dev_down = digital_dev_down, .start_poll = digital_start_poll, .stop_poll = digital_stop_poll, .dep_link_up = digital_dep_link_up, .dep_link_down = digital_dep_link_down, .activate_target = digital_activate_target, .deactivate_target = digital_deactivate_target, .tm_send = digital_tg_send, .im_transceive = digital_in_send, }; struct nfc_digital_dev *nfc_digital_allocate_device(const struct nfc_digital_ops *ops, __u32 supported_protocols, __u32 driver_capabilities, int tx_headroom, int tx_tailroom) { struct nfc_digital_dev *ddev; if (!ops->in_configure_hw || !ops->in_send_cmd || !ops->tg_listen || !ops->tg_configure_hw || !ops->tg_send_cmd || !ops->abort_cmd || !ops->switch_rf || (ops->tg_listen_md && !ops->tg_get_rf_tech)) return NULL; ddev = kzalloc(sizeof(*ddev), GFP_KERNEL); if (!ddev) return NULL; ddev->driver_capabilities = driver_capabilities; ddev->ops = ops; mutex_init(&ddev->cmd_lock); INIT_LIST_HEAD(&ddev->cmd_queue); INIT_WORK(&ddev->cmd_work, digital_wq_cmd); INIT_WORK(&ddev->cmd_complete_work, digital_wq_cmd_complete); mutex_init(&ddev->poll_lock); INIT_DELAYED_WORK(&ddev->poll_work, digital_wq_poll); if (supported_protocols & NFC_PROTO_JEWEL_MASK) ddev->protocols |= NFC_PROTO_JEWEL_MASK; if (supported_protocols & NFC_PROTO_MIFARE_MASK) ddev->protocols |= NFC_PROTO_MIFARE_MASK; if (supported_protocols & NFC_PROTO_FELICA_MASK) ddev->protocols |= NFC_PROTO_FELICA_MASK; if (supported_protocols & NFC_PROTO_NFC_DEP_MASK) ddev->protocols |= NFC_PROTO_NFC_DEP_MASK; if (supported_protocols & NFC_PROTO_ISO15693_MASK) ddev->protocols |= NFC_PROTO_ISO15693_MASK; if (supported_protocols & NFC_PROTO_ISO14443_MASK) ddev->protocols |= NFC_PROTO_ISO14443_MASK; if (supported_protocols & NFC_PROTO_ISO14443_B_MASK) ddev->protocols |= NFC_PROTO_ISO14443_B_MASK; ddev->tx_headroom = tx_headroom + DIGITAL_MAX_HEADER_LEN; ddev->tx_tailroom = tx_tailroom + DIGITAL_CRC_LEN; ddev->nfc_dev = nfc_allocate_device(&digital_nfc_ops, ddev->protocols, ddev->tx_headroom, ddev->tx_tailroom); if (!ddev->nfc_dev) { pr_err("nfc_allocate_device failed\n"); goto free_dev; } nfc_set_drvdata(ddev->nfc_dev, ddev); return ddev; free_dev: kfree(ddev); return NULL; } EXPORT_SYMBOL(nfc_digital_allocate_device); void nfc_digital_free_device(struct nfc_digital_dev *ddev) { nfc_free_device(ddev->nfc_dev); kfree(ddev); } EXPORT_SYMBOL(nfc_digital_free_device); int nfc_digital_register_device(struct nfc_digital_dev *ddev) { return nfc_register_device(ddev->nfc_dev); } EXPORT_SYMBOL(nfc_digital_register_device); void nfc_digital_unregister_device(struct nfc_digital_dev *ddev) { struct digital_cmd *cmd, *n; nfc_unregister_device(ddev->nfc_dev); mutex_lock(&ddev->poll_lock); ddev->poll_tech_count = 0; mutex_unlock(&ddev->poll_lock); cancel_delayed_work_sync(&ddev->poll_work); cancel_work_sync(&ddev->cmd_work); cancel_work_sync(&ddev->cmd_complete_work); list_for_each_entry_safe(cmd, n, &ddev->cmd_queue, queue) { list_del(&cmd->queue); /* Call the command callback if any and pass it a ENODEV error. * This gives a chance to the command issuer to free any * allocated buffer. */ if (cmd->cmd_cb) cmd->cmd_cb(ddev, cmd->cb_context, ERR_PTR(-ENODEV)); kfree(cmd->mdaa_params); kfree(cmd); } } EXPORT_SYMBOL(nfc_digital_unregister_device); MODULE_DESCRIPTION("NFC Digital protocol stack"); MODULE_LICENSE("GPL"); |
| 4 4 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 | /************************************************************************** * * Copyright 2006-2008 Tungsten Graphics, Inc., Cedar Park, TX. USA. * Copyright 2016 Intel Corporation * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * * **************************************************************************/ /* * Authors: * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> */ #ifndef _DRM_MM_H_ #define _DRM_MM_H_ /* * Generic range manager structs */ #include <linux/bug.h> #include <linux/rbtree.h> #include <linux/limits.h> #include <linux/mm_types.h> #include <linux/list.h> #include <linux/spinlock.h> #ifdef CONFIG_DRM_DEBUG_MM #include <linux/stackdepot.h> #endif #include <linux/types.h> #include <drm/drm_print.h> #ifdef CONFIG_DRM_DEBUG_MM #define DRM_MM_BUG_ON(expr) BUG_ON(expr) #else #define DRM_MM_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) #endif /** * enum drm_mm_insert_mode - control search and allocation behaviour * * The &struct drm_mm range manager supports finding a suitable modes using * a number of search trees. These trees are oranised by size, by address and * in most recent eviction order. This allows the user to find either the * smallest hole to reuse, the lowest or highest address to reuse, or simply * reuse the most recent eviction that fits. When allocating the &drm_mm_node * from within the hole, the &drm_mm_insert_mode also dictate whether to * allocate the lowest matching address or the highest. */ enum drm_mm_insert_mode { /** * @DRM_MM_INSERT_BEST: * * Search for the smallest hole (within the search range) that fits * the desired node. * * Allocates the node from the bottom of the found hole. */ DRM_MM_INSERT_BEST = 0, /** * @DRM_MM_INSERT_LOW: * * Search for the lowest hole (address closest to 0, within the search * range) that fits the desired node. * * Allocates the node from the bottom of the found hole. */ DRM_MM_INSERT_LOW, /** * @DRM_MM_INSERT_HIGH: * * Search for the highest hole (address closest to U64_MAX, within the * search range) that fits the desired node. * * Allocates the node from the *top* of the found hole. The specified * alignment for the node is applied to the base of the node * (&drm_mm_node.start). */ DRM_MM_INSERT_HIGH, /** * @DRM_MM_INSERT_EVICT: * * Search for the most recently evicted hole (within the search range) * that fits the desired node. This is appropriate for use immediately * after performing an eviction scan (see drm_mm_scan_init()) and * removing the selected nodes to form a hole. * * Allocates the node from the bottom of the found hole. */ DRM_MM_INSERT_EVICT, /** * @DRM_MM_INSERT_ONCE: * * Only check the first hole for suitablity and report -ENOSPC * immediately otherwise, rather than check every hole until a * suitable one is found. Can only be used in conjunction with another * search method such as DRM_MM_INSERT_HIGH or DRM_MM_INSERT_LOW. */ DRM_MM_INSERT_ONCE = BIT(31), /** * @DRM_MM_INSERT_HIGHEST: * * Only check the highest hole (the hole with the largest address) and * insert the node at the top of the hole or report -ENOSPC if * unsuitable. * * Does not search all holes. */ DRM_MM_INSERT_HIGHEST = DRM_MM_INSERT_HIGH | DRM_MM_INSERT_ONCE, /** * @DRM_MM_INSERT_LOWEST: * * Only check the lowest hole (the hole with the smallest address) and * insert the node at the bottom of the hole or report -ENOSPC if * unsuitable. * * Does not search all holes. */ DRM_MM_INSERT_LOWEST = DRM_MM_INSERT_LOW | DRM_MM_INSERT_ONCE, }; /** * struct drm_mm_node - allocated block in the DRM allocator * * This represents an allocated block in a &drm_mm allocator. Except for * pre-reserved nodes inserted using drm_mm_reserve_node() the structure is * entirely opaque and should only be accessed through the provided funcions. * Since allocation of these nodes is entirely handled by the driver they can be * embedded. */ struct drm_mm_node { /** @color: Opaque driver-private tag. */ unsigned long color; /** @start: Start address of the allocated block. */ u64 start; /** @size: Size of the allocated block. */ u64 size; /* private: */ struct drm_mm *mm; struct list_head node_list; struct list_head hole_stack; struct rb_node rb; struct rb_node rb_hole_size; struct rb_node rb_hole_addr; u64 __subtree_last; u64 hole_size; u64 subtree_max_hole; unsigned long flags; #define DRM_MM_NODE_ALLOCATED_BIT 0 #define DRM_MM_NODE_SCANNED_BIT 1 #ifdef CONFIG_DRM_DEBUG_MM depot_stack_handle_t stack; #endif }; /** * struct drm_mm - DRM allocator * * DRM range allocator with a few special functions and features geared towards * managing GPU memory. Except for the @color_adjust callback the structure is * entirely opaque and should only be accessed through the provided functions * and macros. This structure can be embedded into larger driver structures. */ struct drm_mm { /** * @color_adjust: * * Optional driver callback to further apply restrictions on a hole. The * node argument points at the node containing the hole from which the * block would be allocated (see drm_mm_hole_follows() and friends). The * other arguments are the size of the block to be allocated. The driver * can adjust the start and end as needed to e.g. insert guard pages. */ void (*color_adjust)(const struct drm_mm_node *node, unsigned long color, u64 *start, u64 *end); /* private: */ /* List of all memory nodes that immediately precede a free hole. */ struct list_head hole_stack; /* head_node.node_list is the list of all memory nodes, ordered * according to the (increasing) start address of the memory node. */ struct drm_mm_node head_node; /* Keep an interval_tree for fast lookup of drm_mm_nodes by address. */ struct rb_root_cached interval_tree; struct rb_root_cached holes_size; struct rb_root holes_addr; unsigned long scan_active; }; /** * struct drm_mm_scan - DRM allocator eviction roaster data * * This structure tracks data needed for the eviction roaster set up using * drm_mm_scan_init(), and used with drm_mm_scan_add_block() and * drm_mm_scan_remove_block(). The structure is entirely opaque and should only * be accessed through the provided functions and macros. It is meant to be * allocated temporarily by the driver on the stack. */ struct drm_mm_scan { /* private: */ struct drm_mm *mm; u64 size; u64 alignment; u64 remainder_mask; u64 range_start; u64 range_end; u64 hit_start; u64 hit_end; unsigned long color; enum drm_mm_insert_mode mode; }; /** * drm_mm_node_allocated - checks whether a node is allocated * @node: drm_mm_node to check * * Drivers are required to clear a node prior to using it with the * drm_mm range manager. * * Drivers should use this helper for proper encapsulation of drm_mm * internals. * * Returns: * True if the @node is allocated. */ static inline bool drm_mm_node_allocated(const struct drm_mm_node *node) { return test_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags); } /** * drm_mm_initialized - checks whether an allocator is initialized * @mm: drm_mm to check * * Drivers should clear the struct drm_mm prior to initialisation if they * want to use this function. * * Drivers should use this helper for proper encapsulation of drm_mm * internals. * * Returns: * True if the @mm is initialized. */ static inline bool drm_mm_initialized(const struct drm_mm *mm) { return READ_ONCE(mm->hole_stack.next); } /** * drm_mm_hole_follows - checks whether a hole follows this node * @node: drm_mm_node to check * * Holes are embedded into the drm_mm using the tail of a drm_mm_node. * If you wish to know whether a hole follows this particular node, * query this function. See also drm_mm_hole_node_start() and * drm_mm_hole_node_end(). * * Returns: * True if a hole follows the @node. */ static inline bool drm_mm_hole_follows(const struct drm_mm_node *node) { return node->hole_size; } static inline u64 __drm_mm_hole_node_start(const struct drm_mm_node *hole_node) { return hole_node->start + hole_node->size; } /** * drm_mm_hole_node_start - computes the start of the hole following @node * @hole_node: drm_mm_node which implicitly tracks the following hole * * This is useful for driver-specific debug dumpers. Otherwise drivers should * not inspect holes themselves. Drivers must check first whether a hole indeed * follows by looking at drm_mm_hole_follows() * * Returns: * Start of the subsequent hole. */ static inline u64 drm_mm_hole_node_start(const struct drm_mm_node *hole_node) { DRM_MM_BUG_ON(!drm_mm_hole_follows(hole_node)); return __drm_mm_hole_node_start(hole_node); } static inline u64 __drm_mm_hole_node_end(const struct drm_mm_node *hole_node) { return list_next_entry(hole_node, node_list)->start; } /** * drm_mm_hole_node_end - computes the end of the hole following @node * @hole_node: drm_mm_node which implicitly tracks the following hole * * This is useful for driver-specific debug dumpers. Otherwise drivers should * not inspect holes themselves. Drivers must check first whether a hole indeed * follows by looking at drm_mm_hole_follows(). * * Returns: * End of the subsequent hole. */ static inline u64 drm_mm_hole_node_end(const struct drm_mm_node *hole_node) { return __drm_mm_hole_node_end(hole_node); } /** * drm_mm_nodes - list of nodes under the drm_mm range manager * @mm: the struct drm_mm range manager * * As the drm_mm range manager hides its node_list deep with its * structure, extracting it looks painful and repetitive. This is * not expected to be used outside of the drm_mm_for_each_node() * macros and similar internal functions. * * Returns: * The node list, may be empty. */ #define drm_mm_nodes(mm) (&(mm)->head_node.node_list) /** * drm_mm_for_each_node - iterator to walk over all allocated nodes * @entry: &struct drm_mm_node to assign to in each iteration step * @mm: &drm_mm allocator to walk * * This iterator walks over all nodes in the range allocator. It is implemented * with list_for_each(), so not save against removal of elements. */ #define drm_mm_for_each_node(entry, mm) \ list_for_each_entry(entry, drm_mm_nodes(mm), node_list) /** * drm_mm_for_each_node_safe - iterator to walk over all allocated nodes * @entry: &struct drm_mm_node to assign to in each iteration step * @next: &struct drm_mm_node to store the next step * @mm: &drm_mm allocator to walk * * This iterator walks over all nodes in the range allocator. It is implemented * with list_for_each_safe(), so save against removal of elements. */ #define drm_mm_for_each_node_safe(entry, next, mm) \ list_for_each_entry_safe(entry, next, drm_mm_nodes(mm), node_list) /** * drm_mm_for_each_hole - iterator to walk over all holes * @pos: &drm_mm_node used internally to track progress * @mm: &drm_mm allocator to walk * @hole_start: ulong variable to assign the hole start to on each iteration * @hole_end: ulong variable to assign the hole end to on each iteration * * This iterator walks over all holes in the range allocator. It is implemented * with list_for_each(), so not save against removal of elements. @entry is used * internally and will not reflect a real drm_mm_node for the very first hole. * Hence users of this iterator may not access it. * * Implementation Note: * We need to inline list_for_each_entry in order to be able to set hole_start * and hole_end on each iteration while keeping the macro sane. */ #define drm_mm_for_each_hole(pos, mm, hole_start, hole_end) \ for (pos = list_first_entry(&(mm)->hole_stack, \ typeof(*pos), hole_stack); \ &pos->hole_stack != &(mm)->hole_stack ? \ hole_start = drm_mm_hole_node_start(pos), \ hole_end = hole_start + pos->hole_size, \ 1 : 0; \ pos = list_next_entry(pos, hole_stack)) /* * Basic range manager support (drm_mm.c) */ int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node); int drm_mm_insert_node_in_range(struct drm_mm *mm, struct drm_mm_node *node, u64 size, u64 alignment, unsigned long color, u64 start, u64 end, enum drm_mm_insert_mode mode); /** * drm_mm_insert_node_generic - search for space and insert @node * @mm: drm_mm to allocate from * @node: preallocate node to insert * @size: size of the allocation * @alignment: alignment of the allocation * @color: opaque tag value to use for this node * @mode: fine-tune the allocation search and placement * * This is a simplified version of drm_mm_insert_node_in_range() with no * range restrictions applied. * * The preallocated node must be cleared to 0. * * Returns: * 0 on success, -ENOSPC if there's no suitable hole. */ static inline int drm_mm_insert_node_generic(struct drm_mm *mm, struct drm_mm_node *node, u64 size, u64 alignment, unsigned long color, enum drm_mm_insert_mode mode) { return drm_mm_insert_node_in_range(mm, node, size, alignment, color, 0, U64_MAX, mode); } /** * drm_mm_insert_node - search for space and insert @node * @mm: drm_mm to allocate from * @node: preallocate node to insert * @size: size of the allocation * * This is a simplified version of drm_mm_insert_node_generic() with @color set * to 0. * * The preallocated node must be cleared to 0. * * Returns: * 0 on success, -ENOSPC if there's no suitable hole. */ static inline int drm_mm_insert_node(struct drm_mm *mm, struct drm_mm_node *node, u64 size) { return drm_mm_insert_node_generic(mm, node, size, 0, 0, 0); } void drm_mm_remove_node(struct drm_mm_node *node); void drm_mm_init(struct drm_mm *mm, u64 start, u64 size); void drm_mm_takedown(struct drm_mm *mm); /** * drm_mm_clean - checks whether an allocator is clean * @mm: drm_mm allocator to check * * Returns: * True if the allocator is completely free, false if there's still a node * allocated in it. */ static inline bool drm_mm_clean(const struct drm_mm *mm) { return list_empty(drm_mm_nodes(mm)); } struct drm_mm_node * __drm_mm_interval_first(const struct drm_mm *mm, u64 start, u64 last); /** * drm_mm_for_each_node_in_range - iterator to walk over a range of * allocated nodes * @node__: drm_mm_node structure to assign to in each iteration step * @mm__: drm_mm allocator to walk * @start__: starting offset, the first node will overlap this * @end__: ending offset, the last node will start before this (but may overlap) * * This iterator walks over all nodes in the range allocator that lie * between @start and @end. It is implemented similarly to list_for_each(), * but using the internal interval tree to accelerate the search for the * starting node, and so not safe against removal of elements. It assumes * that @end is within (or is the upper limit of) the drm_mm allocator. * If [@start, @end] are beyond the range of the drm_mm, the iterator may walk * over the special _unallocated_ &drm_mm.head_node, and may even continue * indefinitely. */ #define drm_mm_for_each_node_in_range(node__, mm__, start__, end__) \ for (node__ = __drm_mm_interval_first((mm__), (start__), (end__)-1); \ node__->start < (end__); \ node__ = list_next_entry(node__, node_list)) void drm_mm_scan_init_with_range(struct drm_mm_scan *scan, struct drm_mm *mm, u64 size, u64 alignment, unsigned long color, u64 start, u64 end, enum drm_mm_insert_mode mode); /** * drm_mm_scan_init - initialize lru scanning * @scan: scan state * @mm: drm_mm to scan * @size: size of the allocation * @alignment: alignment of the allocation * @color: opaque tag value to use for the allocation * @mode: fine-tune the allocation search and placement * * This is a simplified version of drm_mm_scan_init_with_range() with no range * restrictions applied. * * This simply sets up the scanning routines with the parameters for the desired * hole. * * Warning: * As long as the scan list is non-empty, no other operations than * adding/removing nodes to/from the scan list are allowed. */ static inline void drm_mm_scan_init(struct drm_mm_scan *scan, struct drm_mm *mm, u64 size, u64 alignment, unsigned long color, enum drm_mm_insert_mode mode) { drm_mm_scan_init_with_range(scan, mm, size, alignment, color, 0, U64_MAX, mode); } bool drm_mm_scan_add_block(struct drm_mm_scan *scan, struct drm_mm_node *node); bool drm_mm_scan_remove_block(struct drm_mm_scan *scan, struct drm_mm_node *node); struct drm_mm_node *drm_mm_scan_color_evict(struct drm_mm_scan *scan); void drm_mm_print(const struct drm_mm *mm, struct drm_printer *p); #endif |
| 93 1 93 4 4 4 4 20 22 24 6 4 1 1 3 3 3 3 3 3 2 2 2 3 2 2 11 10 2 2 2 1 1 8 5 3 2 8 1 20 17 17 17 17 17 1 1 17 15 17 17 5 23 19 17 5 7 22 6 6 6 1 5 2 3 5 6 15 15 2 13 13 13 15 9 7 11 10 13 10 3 3 11 11 11 10 10 10 10 11 10 11 4 4 4 4 4 4 4 9 9 9 3 8 3 5 2 19 4 19 18 19 19 18 1 18 19 19 6 11 19 5 4 18 1 19 7 14 15 9 10 3 10 6 19 7 3 2 5 7 6 2 1 7 4 4 4 4 2 1 3 4 2 3 3 2 2 2 10 11 11 10 2 2 2 2 11 11 10 1 1 7 10 1 1 1 1 1 10 10 10 10 2 2 2 10 2 10 7 10 1 1 1 1 1 1 1 11 11 1 1 1 1 1 1 1 1 1 63 11 11 11 11 2 2 2 2 4 8 4 2 1 2 4 4 4 7 7 7 6 6 6 4 4 2 3 1 1 3 2 8 2 1 4 5 3 2 1 2 4 3 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 | // SPDX-License-Identifier: GPL-2.0 /* * Implement CPU time clocks for the POSIX clock interface. */ #include <linux/sched/signal.h> #include <linux/sched/cputime.h> #include <linux/posix-timers.h> #include <linux/errno.h> #include <linux/math64.h> #include <linux/uaccess.h> #include <linux/kernel_stat.h> #include <trace/events/timer.h> #include <linux/tick.h> #include <linux/workqueue.h> #include <linux/compat.h> #include <linux/sched/deadline.h> #include <linux/task_work.h> #include "posix-timers.h" static void posix_cpu_timer_rearm(struct k_itimer *timer); void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit) { posix_cputimers_init(pct); if (cpu_limit != RLIM_INFINITY) { pct->bases[CPUCLOCK_PROF].nextevt = cpu_limit * NSEC_PER_SEC; pct->timers_active = true; } } /* * Called after updating RLIMIT_CPU to run cpu timer and update * tsk->signal->posix_cputimers.bases[clock].nextevt expiration cache if * necessary. Needs siglock protection since other code may update the * expiration cache as well. * * Returns 0 on success, -ESRCH on failure. Can fail if the task is exiting and * we cannot lock_task_sighand. Cannot fail if task is current. */ int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) { u64 nsecs = rlim_new * NSEC_PER_SEC; unsigned long irq_fl; if (!lock_task_sighand(task, &irq_fl)) return -ESRCH; set_process_cpu_timer(task, CPUCLOCK_PROF, &nsecs, NULL); unlock_task_sighand(task, &irq_fl); return 0; } /* * Functions for validating access to tasks. */ static struct pid *pid_for_clock(const clockid_t clock, bool gettime) { const bool thread = !!CPUCLOCK_PERTHREAD(clock); const pid_t upid = CPUCLOCK_PID(clock); struct pid *pid; if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX) return NULL; /* * If the encoded PID is 0, then the timer is targeted at current * or the process to which current belongs. */ if (upid == 0) return thread ? task_pid(current) : task_tgid(current); pid = find_vpid(upid); if (!pid) return NULL; if (thread) { struct task_struct *tsk = pid_task(pid, PIDTYPE_PID); return (tsk && same_thread_group(tsk, current)) ? pid : NULL; } /* * For clock_gettime(PROCESS) allow finding the process by * with the pid of the current task. The code needs the tgid * of the process so that pid_task(pid, PIDTYPE_TGID) can be * used to find the process. */ if (gettime && (pid == task_pid(current))) return task_tgid(current); /* * For processes require that pid identifies a process. */ return pid_has_task(pid, PIDTYPE_TGID) ? pid : NULL; } static inline int validate_clock_permissions(const clockid_t clock) { int ret; rcu_read_lock(); ret = pid_for_clock(clock, false) ? 0 : -EINVAL; rcu_read_unlock(); return ret; } static inline enum pid_type clock_pid_type(const clockid_t clock) { return CPUCLOCK_PERTHREAD(clock) ? PIDTYPE_PID : PIDTYPE_TGID; } static inline struct task_struct *cpu_timer_task_rcu(struct k_itimer *timer) { return pid_task(timer->it.cpu.pid, clock_pid_type(timer->it_clock)); } /* * Update expiry time from increment, and increase overrun count, * given the current clock sample. */ static u64 bump_cpu_timer(struct k_itimer *timer, u64 now) { u64 delta, incr, expires = timer->it.cpu.node.expires; int i; if (!timer->it_interval) return expires; if (now < expires) return expires; incr = timer->it_interval; delta = now + incr - expires; /* Don't use (incr*2 < delta), incr*2 might overflow. */ for (i = 0; incr < delta - incr; i++) incr = incr << 1; for (; i >= 0; incr >>= 1, i--) { if (delta < incr) continue; timer->it.cpu.node.expires += incr; timer->it_overrun += 1LL << i; delta -= incr; } return timer->it.cpu.node.expires; } /* Check whether all cache entries contain U64_MAX, i.e. eternal expiry time */ static inline bool expiry_cache_is_inactive(const struct posix_cputimers *pct) { return !(~pct->bases[CPUCLOCK_PROF].nextevt | ~pct->bases[CPUCLOCK_VIRT].nextevt | ~pct->bases[CPUCLOCK_SCHED].nextevt); } static int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp) { int error = validate_clock_permissions(which_clock); if (!error) { tp->tv_sec = 0; tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ); if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { /* * If sched_clock is using a cycle counter, we * don't have any idea of its true resolution * exported, but it is much more than 1s/HZ. */ tp->tv_nsec = 1; } } return error; } static int posix_cpu_clock_set(const clockid_t clock, const struct timespec64 *tp) { int error = validate_clock_permissions(clock); /* * You can never reset a CPU clock, but we check for other errors * in the call before failing with EPERM. */ return error ? : -EPERM; } /* * Sample a per-thread clock for the given task. clkid is validated. */ static u64 cpu_clock_sample(const clockid_t clkid, struct task_struct *p) { u64 utime, stime; if (clkid == CPUCLOCK_SCHED) return task_sched_runtime(p); task_cputime(p, &utime, &stime); switch (clkid) { case CPUCLOCK_PROF: return utime + stime; case CPUCLOCK_VIRT: return utime; default: WARN_ON_ONCE(1); } return 0; } static inline void store_samples(u64 *samples, u64 stime, u64 utime, u64 rtime) { samples[CPUCLOCK_PROF] = stime + utime; samples[CPUCLOCK_VIRT] = utime; samples[CPUCLOCK_SCHED] = rtime; } static void task_sample_cputime(struct task_struct *p, u64 *samples) { u64 stime, utime; task_cputime(p, &utime, &stime); store_samples(samples, stime, utime, p->se.sum_exec_runtime); } static void proc_sample_cputime_atomic(struct task_cputime_atomic *at, u64 *samples) { u64 stime, utime, rtime; utime = atomic64_read(&at->utime); stime = atomic64_read(&at->stime); rtime = atomic64_read(&at->sum_exec_runtime); store_samples(samples, stime, utime, rtime); } /* * Set cputime to sum_cputime if sum_cputime > cputime. Use cmpxchg * to avoid race conditions with concurrent updates to cputime. */ static inline void __update_gt_cputime(atomic64_t *cputime, u64 sum_cputime) { u64 curr_cputime = atomic64_read(cputime); do { if (sum_cputime <= curr_cputime) return; } while (!atomic64_try_cmpxchg(cputime, &curr_cputime, sum_cputime)); } static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct task_cputime *sum) { __update_gt_cputime(&cputime_atomic->utime, sum->utime); __update_gt_cputime(&cputime_atomic->stime, sum->stime); __update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime); } /** * thread_group_sample_cputime - Sample cputime for a given task * @tsk: Task for which cputime needs to be started * @samples: Storage for time samples * * Called from sys_getitimer() to calculate the expiry time of an active * timer. That means group cputime accounting is already active. Called * with task sighand lock held. * * Updates @times with an uptodate sample of the thread group cputimes. */ void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples) { struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; struct posix_cputimers *pct = &tsk->signal->posix_cputimers; WARN_ON_ONCE(!pct->timers_active); proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples); } /** * thread_group_start_cputime - Start cputime and return a sample * @tsk: Task for which cputime needs to be started * @samples: Storage for time samples * * The thread group cputime accounting is avoided when there are no posix * CPU timers armed. Before starting a timer it's required to check whether * the time accounting is active. If not, a full update of the atomic * accounting store needs to be done and the accounting enabled. * * Updates @times with an uptodate sample of the thread group cputimes. */ static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples) { struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; struct posix_cputimers *pct = &tsk->signal->posix_cputimers; lockdep_assert_task_sighand_held(tsk); /* Check if cputimer isn't running. This is accessed without locking. */ if (!READ_ONCE(pct->timers_active)) { struct task_cputime sum; /* * The POSIX timer interface allows for absolute time expiry * values through the TIMER_ABSTIME flag, therefore we have * to synchronize the timer to the clock every time we start it. */ thread_group_cputime(tsk, &sum); update_gt_cputime(&cputimer->cputime_atomic, &sum); /* * We're setting timers_active without a lock. Ensure this * only gets written to in one operation. We set it after * update_gt_cputime() as a small optimization, but * barriers are not required because update_gt_cputime() * can handle concurrent updates. */ WRITE_ONCE(pct->timers_active, true); } proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples); } static void __thread_group_cputime(struct task_struct *tsk, u64 *samples) { struct task_cputime ct; thread_group_cputime(tsk, &ct); store_samples(samples, ct.stime, ct.utime, ct.sum_exec_runtime); } /* * Sample a process (thread group) clock for the given task clkid. If the * group's cputime accounting is already enabled, read the atomic * store. Otherwise a full update is required. clkid is already validated. */ static u64 cpu_clock_sample_group(const clockid_t clkid, struct task_struct *p, bool start) { struct thread_group_cputimer *cputimer = &p->signal->cputimer; struct posix_cputimers *pct = &p->signal->posix_cputimers; u64 samples[CPUCLOCK_MAX]; if (!READ_ONCE(pct->timers_active)) { if (start) thread_group_start_cputime(p, samples); else __thread_group_cputime(p, samples); } else { proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples); } return samples[clkid]; } static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp) { const clockid_t clkid = CPUCLOCK_WHICH(clock); struct task_struct *tsk; u64 t; rcu_read_lock(); tsk = pid_task(pid_for_clock(clock, true), clock_pid_type(clock)); if (!tsk) { rcu_read_unlock(); return -EINVAL; } if (CPUCLOCK_PERTHREAD(clock)) t = cpu_clock_sample(clkid, tsk); else t = cpu_clock_sample_group(clkid, tsk, false); rcu_read_unlock(); *tp = ns_to_timespec64(t); return 0; } /* * Validate the clockid_t for a new CPU-clock timer, and initialize the timer. * This is called from sys_timer_create() and do_cpu_nanosleep() with the * new timer already all-zeros initialized. */ static int posix_cpu_timer_create(struct k_itimer *new_timer) { static struct lock_class_key posix_cpu_timers_key; struct pid *pid; rcu_read_lock(); pid = pid_for_clock(new_timer->it_clock, false); if (!pid) { rcu_read_unlock(); return -EINVAL; } /* * If posix timer expiry is handled in task work context then * timer::it_lock can be taken without disabling interrupts as all * other locking happens in task context. This requires a separate * lock class key otherwise regular posix timer expiry would record * the lock class being taken in interrupt context and generate a * false positive warning. */ if (IS_ENABLED(CONFIG_POSIX_CPU_TIMERS_TASK_WORK)) lockdep_set_class(&new_timer->it_lock, &posix_cpu_timers_key); new_timer->kclock = &clock_posix_cpu; timerqueue_init(&new_timer->it.cpu.node); new_timer->it.cpu.pid = get_pid(pid); rcu_read_unlock(); return 0; } static struct posix_cputimer_base *timer_base(struct k_itimer *timer, struct task_struct *tsk) { int clkidx = CPUCLOCK_WHICH(timer->it_clock); if (CPUCLOCK_PERTHREAD(timer->it_clock)) return tsk->posix_cputimers.bases + clkidx; else return tsk->signal->posix_cputimers.bases + clkidx; } /* * Force recalculating the base earliest expiration on the next tick. * This will also re-evaluate the need to keep around the process wide * cputime counter and tick dependency and eventually shut these down * if necessary. */ static void trigger_base_recalc_expires(struct k_itimer *timer, struct task_struct *tsk) { struct posix_cputimer_base *base = timer_base(timer, tsk); base->nextevt = 0; } /* * Dequeue the timer and reset the base if it was its earliest expiration. * It makes sure the next tick recalculates the base next expiration so we * don't keep the costly process wide cputime counter around for a random * amount of time, along with the tick dependency. * * If another timer gets queued between this and the next tick, its * expiration will update the base next event if necessary on the next * tick. */ static void disarm_timer(struct k_itimer *timer, struct task_struct *p) { struct cpu_timer *ctmr = &timer->it.cpu; struct posix_cputimer_base *base; if (!cpu_timer_dequeue(ctmr)) return; base = timer_base(timer, p); if (cpu_timer_getexpires(ctmr) == base->nextevt) trigger_base_recalc_expires(timer, p); } /* * Clean up a CPU-clock timer that is about to be destroyed. * This is called from timer deletion with the timer already locked. * If we return TIMER_RETRY, it's necessary to release the timer's lock * and try again. (This happens when the timer is in the middle of firing.) */ static int posix_cpu_timer_del(struct k_itimer *timer) { struct cpu_timer *ctmr = &timer->it.cpu; struct sighand_struct *sighand; struct task_struct *p; unsigned long flags; int ret = 0; rcu_read_lock(); p = cpu_timer_task_rcu(timer); if (!p) goto out; /* * Protect against sighand release/switch in exit/exec and process/ * thread timer list entry concurrent read/writes. */ sighand = lock_task_sighand(p, &flags); if (unlikely(sighand == NULL)) { /* * This raced with the reaping of the task. The exit cleanup * should have removed this timer from the timer queue. */ WARN_ON_ONCE(ctmr->head || timerqueue_node_queued(&ctmr->node)); } else { if (timer->it.cpu.firing) { /* * Prevent signal delivery. The timer cannot be dequeued * because it is on the firing list which is not protected * by sighand->lock. The delivery path is waiting for * the timer lock. So go back, unlock and retry. */ timer->it.cpu.firing = false; ret = TIMER_RETRY; } else { disarm_timer(timer, p); } unlock_task_sighand(p, &flags); } out: rcu_read_unlock(); if (!ret) { put_pid(ctmr->pid); timer->it_status = POSIX_TIMER_DISARMED; } return ret; } static void cleanup_timerqueue(struct timerqueue_head *head) { struct timerqueue_node *node; struct cpu_timer *ctmr; while ((node = timerqueue_getnext(head))) { timerqueue_del(head, node); ctmr = container_of(node, struct cpu_timer, node); ctmr->head = NULL; } } /* * Clean out CPU timers which are still armed when a thread exits. The * timers are only removed from the list. No other updates are done. The * corresponding posix timers are still accessible, but cannot be rearmed. * * This must be called with the siglock held. */ static void cleanup_timers(struct posix_cputimers *pct) { cleanup_timerqueue(&pct->bases[CPUCLOCK_PROF].tqhead); cleanup_timerqueue(&pct->bases[CPUCLOCK_VIRT].tqhead); cleanup_timerqueue(&pct->bases[CPUCLOCK_SCHED].tqhead); } /* * These are both called with the siglock held, when the current thread * is being reaped. When the final (leader) thread in the group is reaped, * posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit. */ void posix_cpu_timers_exit(struct task_struct *tsk) { cleanup_timers(&tsk->posix_cputimers); } void posix_cpu_timers_exit_group(struct task_struct *tsk) { cleanup_timers(&tsk->signal->posix_cputimers); } /* * Insert the timer on the appropriate list before any timers that * expire later. This must be called with the sighand lock held. */ static void arm_timer(struct k_itimer *timer, struct task_struct *p) { struct posix_cputimer_base *base = timer_base(timer, p); struct cpu_timer *ctmr = &timer->it.cpu; u64 newexp = cpu_timer_getexpires(ctmr); timer->it_status = POSIX_TIMER_ARMED; if (!cpu_timer_enqueue(&base->tqhead, ctmr)) return; /* * We are the new earliest-expiring POSIX 1.b timer, hence * need to update expiration cache. Take into account that * for process timers we share expiration cache with itimers * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME. */ if (newexp < base->nextevt) base->nextevt = newexp; if (CPUCLOCK_PERTHREAD(timer->it_clock)) tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER); else tick_dep_set_signal(p, TICK_DEP_BIT_POSIX_TIMER); } /* * The timer is locked, fire it and arrange for its reload. */ static void cpu_timer_fire(struct k_itimer *timer) { struct cpu_timer *ctmr = &timer->it.cpu; timer->it_status = POSIX_TIMER_DISARMED; if (unlikely(ctmr->nanosleep)) { /* * This a special case for clock_nanosleep, * not a normal timer from sys_timer_create. */ wake_up_process(timer->it_process); cpu_timer_setexpires(ctmr, 0); } else { posix_timer_queue_signal(timer); /* Disable oneshot timers */ if (!timer->it_interval) cpu_timer_setexpires(ctmr, 0); } } static void __posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp, u64 now); /* * Guts of sys_timer_settime for CPU timers. * This is called with the timer locked and interrupts disabled. * If we return TIMER_RETRY, it's necessary to release the timer's lock * and try again. (This happens when the timer is in the middle of firing.) */ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, struct itimerspec64 *new, struct itimerspec64 *old) { bool sigev_none = timer->it_sigev_notify == SIGEV_NONE; clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); struct cpu_timer *ctmr = &timer->it.cpu; u64 old_expires, new_expires, now; struct sighand_struct *sighand; struct task_struct *p; unsigned long flags; int ret = 0; rcu_read_lock(); p = cpu_timer_task_rcu(timer); if (!p) { /* * If p has just been reaped, we can no * longer get any information about it at all. */ rcu_read_unlock(); return -ESRCH; } /* * Use the to_ktime conversion because that clamps the maximum * value to KTIME_MAX and avoid multiplication overflows. */ new_expires = ktime_to_ns(timespec64_to_ktime(new->it_value)); /* * Protect against sighand release/switch in exit/exec and p->cpu_timers * and p->signal->cpu_timers read/write in arm_timer() */ sighand = lock_task_sighand(p, &flags); /* * If p has just been reaped, we can no * longer get any information about it at all. */ if (unlikely(sighand == NULL)) { rcu_read_unlock(); return -ESRCH; } /* Retrieve the current expiry time before disarming the timer */ old_expires = cpu_timer_getexpires(ctmr); if (unlikely(timer->it.cpu.firing)) { /* * Prevent signal delivery. The timer cannot be dequeued * because it is on the firing list which is not protected * by sighand->lock. The delivery path is waiting for * the timer lock. So go back, unlock and retry. */ timer->it.cpu.firing = false; ret = TIMER_RETRY; } else { cpu_timer_dequeue(ctmr); timer->it_status = POSIX_TIMER_DISARMED; } /* * Sample the current clock for saving the previous setting * and for rearming the timer. */ if (CPUCLOCK_PERTHREAD(timer->it_clock)) now = cpu_clock_sample(clkid, p); else now = cpu_clock_sample_group(clkid, p, !sigev_none); /* Retrieve the previous expiry value if requested. */ if (old) { old->it_value = (struct timespec64){ }; if (old_expires) __posix_cpu_timer_get(timer, old, now); } /* Retry if the timer expiry is running concurrently */ if (unlikely(ret)) { unlock_task_sighand(p, &flags); goto out; } /* Convert relative expiry time to absolute */ if (new_expires && !(timer_flags & TIMER_ABSTIME)) new_expires += now; /* Set the new expiry time (might be 0) */ cpu_timer_setexpires(ctmr, new_expires); /* * Arm the timer if it is not disabled, the new expiry value has * not yet expired and the timer requires signal delivery. * SIGEV_NONE timers are never armed. In case the timer is not * armed, enforce the reevaluation of the timer base so that the * process wide cputime counter can be disabled eventually. */ if (likely(!sigev_none)) { if (new_expires && now < new_expires) arm_timer(timer, p); else trigger_base_recalc_expires(timer, p); } unlock_task_sighand(p, &flags); posix_timer_set_common(timer, new); /* * If the new expiry time was already in the past the timer was not * queued. Fire it immediately even if the thread never runs to * accumulate more time on this clock. */ if (!sigev_none && new_expires && now >= new_expires) cpu_timer_fire(timer); out: rcu_read_unlock(); return ret; } static void __posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp, u64 now) { bool sigev_none = timer->it_sigev_notify == SIGEV_NONE; u64 expires, iv = timer->it_interval; /* * Make sure that interval timers are moved forward for the * following cases: * - SIGEV_NONE timers which are never armed * - Timers which expired, but the signal has not yet been * delivered */ if (iv && timer->it_status != POSIX_TIMER_ARMED) expires = bump_cpu_timer(timer, now); else expires = cpu_timer_getexpires(&timer->it.cpu); /* * Expired interval timers cannot have a remaining time <= 0. * The kernel has to move them forward so that the next * timer expiry is > @now. */ if (now < expires) { itp->it_value = ns_to_timespec64(expires - now); } else { /* * A single shot SIGEV_NONE timer must return 0, when it is * expired! Timers which have a real signal delivery mode * must return a remaining time greater than 0 because the * signal has not yet been delivered. */ if (!sigev_none) itp->it_value.tv_nsec = 1; } } static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp) { clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); struct task_struct *p; u64 now; rcu_read_lock(); p = cpu_timer_task_rcu(timer); if (p && cpu_timer_getexpires(&timer->it.cpu)) { itp->it_interval = ktime_to_timespec64(timer->it_interval); if (CPUCLOCK_PERTHREAD(timer->it_clock)) now = cpu_clock_sample(clkid, p); else now = cpu_clock_sample_group(clkid, p, false); __posix_cpu_timer_get(timer, itp, now); } rcu_read_unlock(); } #define MAX_COLLECTED 20 static u64 collect_timerqueue(struct timerqueue_head *head, struct list_head *firing, u64 now) { struct timerqueue_node *next; int i = 0; while ((next = timerqueue_getnext(head))) { struct cpu_timer *ctmr; u64 expires; ctmr = container_of(next, struct cpu_timer, node); expires = cpu_timer_getexpires(ctmr); /* Limit the number of timers to expire at once */ if (++i == MAX_COLLECTED || now < expires) return expires; ctmr->firing = true; /* See posix_cpu_timer_wait_running() */ rcu_assign_pointer(ctmr->handling, current); cpu_timer_dequeue(ctmr); list_add_tail(&ctmr->elist, firing); } return U64_MAX; } static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples, struct list_head *firing) { struct posix_cputimer_base *base = pct->bases; int i; for (i = 0; i < CPUCLOCK_MAX; i++, base++) { base->nextevt = collect_timerqueue(&base->tqhead, firing, samples[i]); } } static inline void check_dl_overrun(struct task_struct *tsk) { if (tsk->dl.dl_overrun) { tsk->dl.dl_overrun = 0; send_signal_locked(SIGXCPU, SEND_SIG_PRIV, tsk, PIDTYPE_TGID); } } static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard) { if (time < limit) return false; if (print_fatal_signals) { pr_info("%s Watchdog Timeout (%s): %s[%d]\n", rt ? "RT" : "CPU", hard ? "hard" : "soft", current->comm, task_pid_nr(current)); } send_signal_locked(signo, SEND_SIG_PRIV, current, PIDTYPE_TGID); return true; } /* * Check for any per-thread CPU timers that have fired and move them off * the tsk->cpu_timers[N] list onto the firing list. Here we update the * tsk->it_*_expires values to reflect the remaining thread CPU timers. */ static void check_thread_timers(struct task_struct *tsk, struct list_head *firing) { struct posix_cputimers *pct = &tsk->posix_cputimers; u64 samples[CPUCLOCK_MAX]; unsigned long soft; if (dl_task(tsk)) check_dl_overrun(tsk); if (expiry_cache_is_inactive(pct)) return; task_sample_cputime(tsk, samples); collect_posix_cputimers(pct, samples, firing); /* * Check for the special case thread timers. */ soft = task_rlimit(tsk, RLIMIT_RTTIME); if (soft != RLIM_INFINITY) { /* Task RT timeout is accounted in jiffies. RTTIME is usec */ unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ); unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME); /* At the hard limit, send SIGKILL. No further action. */ if (hard != RLIM_INFINITY && check_rlimit(rttime, hard, SIGKILL, true, true)) return; /* At the soft limit, send a SIGXCPU every second */ if (check_rlimit(rttime, soft, SIGXCPU, true, false)) { soft += USEC_PER_SEC; tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = soft; } } if (expiry_cache_is_inactive(pct)) tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER); } static inline void stop_process_timers(struct signal_struct *sig) { struct posix_cputimers *pct = &sig->posix_cputimers; /* Turn off the active flag. This is done without locking. */ WRITE_ONCE(pct->timers_active, false); tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER); } static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, u64 *expires, u64 cur_time, int signo) { if (!it->expires) return; if (cur_time >= it->expires) { if (it->incr) it->expires += it->incr; else it->expires = 0; trace_itimer_expire(signo == SIGPROF ? ITIMER_PROF : ITIMER_VIRTUAL, task_tgid(tsk), cur_time); send_signal_locked(signo, SEND_SIG_PRIV, tsk, PIDTYPE_TGID); } if (it->expires && it->expires < *expires) *expires = it->expires; } /* * Check for any per-thread CPU timers that have fired and move them * off the tsk->*_timers list onto the firing list. Per-thread timers * have already been taken off. */ static void check_process_timers(struct task_struct *tsk, struct list_head *firing) { struct signal_struct *const sig = tsk->signal; struct posix_cputimers *pct = &sig->posix_cputimers; u64 samples[CPUCLOCK_MAX]; unsigned long soft; /* * If there are no active process wide timers (POSIX 1.b, itimers, * RLIMIT_CPU) nothing to check. Also skip the process wide timer * processing when there is already another task handling them. */ if (!READ_ONCE(pct->timers_active) || pct->expiry_active) return; /* * Signify that a thread is checking for process timers. * Write access to this field is protected by the sighand lock. */ pct->expiry_active = true; /* * Collect the current process totals. Group accounting is active * so the sample can be taken directly. */ proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, samples); collect_posix_cputimers(pct, samples, firing); /* * Check for the special case process timers. */ check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &pct->bases[CPUCLOCK_PROF].nextevt, samples[CPUCLOCK_PROF], SIGPROF); check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &pct->bases[CPUCLOCK_VIRT].nextevt, samples[CPUCLOCK_VIRT], SIGVTALRM); soft = task_rlimit(tsk, RLIMIT_CPU); if (soft != RLIM_INFINITY) { /* RLIMIT_CPU is in seconds. Samples are nanoseconds */ unsigned long hard = task_rlimit_max(tsk, RLIMIT_CPU); u64 ptime = samples[CPUCLOCK_PROF]; u64 softns = (u64)soft * NSEC_PER_SEC; u64 hardns = (u64)hard * NSEC_PER_SEC; /* At the hard limit, send SIGKILL. No further action. */ if (hard != RLIM_INFINITY && check_rlimit(ptime, hardns, SIGKILL, false, true)) return; /* At the soft limit, send a SIGXCPU every second */ if (check_rlimit(ptime, softns, SIGXCPU, false, false)) { sig->rlim[RLIMIT_CPU].rlim_cur = soft + 1; softns += NSEC_PER_SEC; } /* Update the expiry cache */ if (softns < pct->bases[CPUCLOCK_PROF].nextevt) pct->bases[CPUCLOCK_PROF].nextevt = softns; } if (expiry_cache_is_inactive(pct)) stop_process_timers(sig); pct->expiry_active = false; } /* * This is called from the signal code (via posixtimer_rearm) * when the last timer signal was delivered and we have to reload the timer. */ static void posix_cpu_timer_rearm(struct k_itimer *timer) { clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); struct task_struct *p; struct sighand_struct *sighand; unsigned long flags; u64 now; rcu_read_lock(); p = cpu_timer_task_rcu(timer); if (!p) goto out; /* Protect timer list r/w in arm_timer() */ sighand = lock_task_sighand(p, &flags); if (unlikely(sighand == NULL)) goto out; /* * Fetch the current sample and update the timer's expiry time. */ if (CPUCLOCK_PERTHREAD(timer->it_clock)) now = cpu_clock_sample(clkid, p); else now = cpu_clock_sample_group(clkid, p, true); bump_cpu_timer(timer, now); /* * Now re-arm for the new expiry time. */ arm_timer(timer, p); unlock_task_sighand(p, &flags); out: rcu_read_unlock(); } /** * task_cputimers_expired - Check whether posix CPU timers are expired * * @samples: Array of current samples for the CPUCLOCK clocks * @pct: Pointer to a posix_cputimers container * * Returns true if any member of @samples is greater than the corresponding * member of @pct->bases[CLK].nextevt. False otherwise */ static inline bool task_cputimers_expired(const u64 *samples, struct posix_cputimers *pct) { int i; for (i = 0; i < CPUCLOCK_MAX; i++) { if (samples[i] >= pct->bases[i].nextevt) return true; } return false; } /** * fastpath_timer_check - POSIX CPU timers fast path. * * @tsk: The task (thread) being checked. * * Check the task and thread group timers. If both are zero (there are no * timers set) return false. Otherwise snapshot the task and thread group * timers and compare them with the corresponding expiration times. Return * true if a timer has expired, else return false. */ static inline bool fastpath_timer_check(struct task_struct *tsk) { struct posix_cputimers *pct = &tsk->posix_cputimers; struct signal_struct *sig; if (!expiry_cache_is_inactive(pct)) { u64 samples[CPUCLOCK_MAX]; task_sample_cputime(tsk, samples); if (task_cputimers_expired(samples, pct)) return true; } sig = tsk->signal; pct = &sig->posix_cputimers; /* * Check if thread group timers expired when timers are active and * no other thread in the group is already handling expiry for * thread group cputimers. These fields are read without the * sighand lock. However, this is fine because this is meant to be * a fastpath heuristic to determine whether we should try to * acquire the sighand lock to handle timer expiry. * * In the worst case scenario, if concurrently timers_active is set * or expiry_active is cleared, but the current thread doesn't see * the change yet, the timer checks are delayed until the next * thread in the group gets a scheduler interrupt to handle the * timer. This isn't an issue in practice because these types of * delays with signals actually getting sent are expected. */ if (READ_ONCE(pct->timers_active) && !READ_ONCE(pct->expiry_active)) { u64 samples[CPUCLOCK_MAX]; proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, samples); if (task_cputimers_expired(samples, pct)) return true; } if (dl_task(tsk) && tsk->dl.dl_overrun) return true; return false; } static void handle_posix_cpu_timers(struct task_struct *tsk); #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK static void posix_cpu_timers_work(struct callback_head *work) { struct posix_cputimers_work *cw = container_of(work, typeof(*cw), work); mutex_lock(&cw->mutex); handle_posix_cpu_timers(current); mutex_unlock(&cw->mutex); } /* * Invoked from the posix-timer core when a cancel operation failed because * the timer is marked firing. The caller holds rcu_read_lock(), which * protects the timer and the task which is expiring it from being freed. */ static void posix_cpu_timer_wait_running(struct k_itimer *timr) { struct task_struct *tsk = rcu_dereference(timr->it.cpu.handling); /* Has the handling task completed expiry already? */ if (!tsk) return; /* Ensure that the task cannot go away */ get_task_struct(tsk); /* Now drop the RCU protection so the mutex can be locked */ rcu_read_unlock(); /* Wait on the expiry mutex */ mutex_lock(&tsk->posix_cputimers_work.mutex); /* Release it immediately again. */ mutex_unlock(&tsk->posix_cputimers_work.mutex); /* Drop the task reference. */ put_task_struct(tsk); /* Relock RCU so the callsite is balanced */ rcu_read_lock(); } static void posix_cpu_timer_wait_running_nsleep(struct k_itimer *timr) { /* Ensure that timr->it.cpu.handling task cannot go away */ rcu_read_lock(); spin_unlock_irq(&timr->it_lock); posix_cpu_timer_wait_running(timr); rcu_read_unlock(); /* @timr is on stack and is valid */ spin_lock_irq(&timr->it_lock); } /* * Clear existing posix CPU timers task work. */ void clear_posix_cputimers_work(struct task_struct *p) { /* * A copied work entry from the old task is not meaningful, clear it. * N.B. init_task_work will not do this. */ memset(&p->posix_cputimers_work.work, 0, sizeof(p->posix_cputimers_work.work)); init_task_work(&p->posix_cputimers_work.work, posix_cpu_timers_work); mutex_init(&p->posix_cputimers_work.mutex); p->posix_cputimers_work.scheduled = false; } /* * Initialize posix CPU timers task work in init task. Out of line to * keep the callback static and to avoid header recursion hell. */ void __init posix_cputimers_init_work(void) { clear_posix_cputimers_work(current); } /* * Note: All operations on tsk->posix_cputimer_work.scheduled happen either * in hard interrupt context or in task context with interrupts * disabled. Aside of that the writer/reader interaction is always in the * context of the current task, which means they are strict per CPU. */ static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk) { return tsk->posix_cputimers_work.scheduled; } static inline void __run_posix_cpu_timers(struct task_struct *tsk) { if (WARN_ON_ONCE(tsk->posix_cputimers_work.scheduled)) return; /* Schedule task work to actually expire the timers */ tsk->posix_cputimers_work.scheduled = true; task_work_add(tsk, &tsk->posix_cputimers_work.work, TWA_RESUME); } static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk, unsigned long start) { bool ret = true; /* * On !RT kernels interrupts are disabled while collecting expired * timers, so no tick can happen and the fast path check can be * reenabled without further checks. */ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { tsk->posix_cputimers_work.scheduled = false; return true; } /* * On RT enabled kernels ticks can happen while the expired timers * are collected under sighand lock. But any tick which observes * the CPUTIMERS_WORK_SCHEDULED bit set, does not run the fastpath * checks. So reenabling the tick work has do be done carefully: * * Disable interrupts and run the fast path check if jiffies have * advanced since the collecting of expired timers started. If * jiffies have not advanced or the fast path check did not find * newly expired timers, reenable the fast path check in the timer * interrupt. If there are newly expired timers, return false and * let the collection loop repeat. */ local_irq_disable(); if (start != jiffies && fastpath_timer_check(tsk)) ret = false; else tsk->posix_cputimers_work.scheduled = false; local_irq_enable(); return ret; } #else /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */ static inline void __run_posix_cpu_timers(struct task_struct *tsk) { lockdep_posixtimer_enter(); handle_posix_cpu_timers(tsk); lockdep_posixtimer_exit(); } static void posix_cpu_timer_wait_running(struct k_itimer *timr) { cpu_relax(); } static void posix_cpu_timer_wait_running_nsleep(struct k_itimer *timr) { spin_unlock_irq(&timr->it_lock); cpu_relax(); spin_lock_irq(&timr->it_lock); } static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk) { return false; } static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk, unsigned long start) { return true; } #endif /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */ static void handle_posix_cpu_timers(struct task_struct *tsk) { struct k_itimer *timer, *next; unsigned long flags, start; LIST_HEAD(firing); if (!lock_task_sighand(tsk, &flags)) return; do { /* * On RT locking sighand lock does not disable interrupts, * so this needs to be careful vs. ticks. Store the current * jiffies value. */ start = READ_ONCE(jiffies); barrier(); /* * Here we take off tsk->signal->cpu_timers[N] and * tsk->cpu_timers[N] all the timers that are firing, and * put them on the firing list. */ check_thread_timers(tsk, &firing); check_process_timers(tsk, &firing); /* * The above timer checks have updated the expiry cache and * because nothing can have queued or modified timers after * sighand lock was taken above it is guaranteed to be * consistent. So the next timer interrupt fastpath check * will find valid data. * * If timer expiry runs in the timer interrupt context then * the loop is not relevant as timers will be directly * expired in interrupt context. The stub function below * returns always true which allows the compiler to * optimize the loop out. * * If timer expiry is deferred to task work context then * the following rules apply: * * - On !RT kernels no tick can have happened on this CPU * after sighand lock was acquired because interrupts are * disabled. So reenabling task work before dropping * sighand lock and reenabling interrupts is race free. * * - On RT kernels ticks might have happened but the tick * work ignored posix CPU timer handling because the * CPUTIMERS_WORK_SCHEDULED bit is set. Reenabling work * must be done very carefully including a check whether * ticks have happened since the start of the timer * expiry checks. posix_cpu_timers_enable_work() takes * care of that and eventually lets the expiry checks * run again. */ } while (!posix_cpu_timers_enable_work(tsk, start)); /* * We must release sighand lock before taking any timer's lock. * There is a potential race with timer deletion here, as the * siglock now protects our private firing list. We have set * the firing flag in each timer, so that a deletion attempt * that gets the timer lock before we do will give it up and * spin until we've taken care of that timer below. */ unlock_task_sighand(tsk, &flags); /* * Now that all the timers on our list have the firing flag, * no one will touch their list entries but us. We'll take * each timer's lock before clearing its firing flag, so no * timer call will interfere. */ list_for_each_entry_safe(timer, next, &firing, it.cpu.elist) { bool cpu_firing; /* * spin_lock() is sufficient here even independent of the * expiry context. If expiry happens in hard interrupt * context it's obvious. For task work context it's safe * because all other operations on timer::it_lock happen in * task context (syscall or exit). */ spin_lock(&timer->it_lock); list_del_init(&timer->it.cpu.elist); cpu_firing = timer->it.cpu.firing; timer->it.cpu.firing = false; /* * If the firing flag is cleared then this raced with a * timer rearm/delete operation. So don't generate an * event. */ if (likely(cpu_firing)) cpu_timer_fire(timer); /* See posix_cpu_timer_wait_running() */ rcu_assign_pointer(timer->it.cpu.handling, NULL); spin_unlock(&timer->it_lock); } } /* * This is called from the timer interrupt handler. The irq handler has * already updated our counts. We need to check if any timers fire now. * Interrupts are disabled. */ void run_posix_cpu_timers(void) { struct task_struct *tsk = current; lockdep_assert_irqs_disabled(); /* * If the actual expiry is deferred to task work context and the * work is already scheduled there is no point to do anything here. */ if (posix_cpu_timers_work_scheduled(tsk)) return; /* * The fast path checks that there are no expired thread or thread * group timers. If that's so, just return. */ if (!fastpath_timer_check(tsk)) return; __run_posix_cpu_timers(tsk); } /* * Set one of the process-wide special case CPU timers or RLIMIT_CPU. * The tsk->sighand->siglock must be held by the caller. */ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid, u64 *newval, u64 *oldval) { u64 now, *nextevt; if (WARN_ON_ONCE(clkid >= CPUCLOCK_SCHED)) return; nextevt = &tsk->signal->posix_cputimers.bases[clkid].nextevt; now = cpu_clock_sample_group(clkid, tsk, true); if (oldval) { /* * We are setting itimer. The *oldval is absolute and we update * it to be relative, *newval argument is relative and we update * it to be absolute. */ if (*oldval) { if (*oldval <= now) { /* Just about to fire. */ *oldval = TICK_NSEC; } else { *oldval -= now; } } if (*newval) *newval += now; } /* * Update expiration cache if this is the earliest timer. CPUCLOCK_PROF * expiry cache is also used by RLIMIT_CPU!. */ if (*newval < *nextevt) *nextevt = *newval; tick_dep_set_signal(tsk, TICK_DEP_BIT_POSIX_TIMER); } static int do_cpu_nanosleep(const clockid_t which_clock, int flags, const struct timespec64 *rqtp) { struct itimerspec64 it; struct k_itimer timer; u64 expires; int error; /* * Set up a temporary timer and then wait for it to go off. */ memset(&timer, 0, sizeof timer); spin_lock_init(&timer.it_lock); timer.it_clock = which_clock; timer.it_overrun = -1; error = posix_cpu_timer_create(&timer); timer.it_process = current; timer.it.cpu.nanosleep = true; if (!error) { static struct itimerspec64 zero_it; struct restart_block *restart; memset(&it, 0, sizeof(it)); it.it_value = *rqtp; spin_lock_irq(&timer.it_lock); error = posix_cpu_timer_set(&timer, flags, &it, NULL); if (error) { spin_unlock_irq(&timer.it_lock); return error; } while (!signal_pending(current)) { if (!cpu_timer_getexpires(&timer.it.cpu)) { /* * Our timer fired and was reset, below * deletion can not fail. */ posix_cpu_timer_del(&timer); spin_unlock_irq(&timer.it_lock); return 0; } /* * Block until cpu_timer_fire (or a signal) wakes us. */ __set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irq(&timer.it_lock); schedule(); spin_lock_irq(&timer.it_lock); } /* * We were interrupted by a signal. */ expires = cpu_timer_getexpires(&timer.it.cpu); error = posix_cpu_timer_set(&timer, 0, &zero_it, &it); if (!error) { /* Timer is now unarmed, deletion can not fail. */ posix_cpu_timer_del(&timer); } else { while (error == TIMER_RETRY) { posix_cpu_timer_wait_running_nsleep(&timer); error = posix_cpu_timer_del(&timer); } } spin_unlock_irq(&timer.it_lock); if ((it.it_value.tv_sec | it.it_value.tv_nsec) == 0) { /* * It actually did fire already. */ return 0; } error = -ERESTART_RESTARTBLOCK; /* * Report back to the user the time still remaining. */ restart = ¤t->restart_block; restart->nanosleep.expires = expires; if (restart->nanosleep.type != TT_NONE) error = nanosleep_copyout(restart, &it.it_value); } return error; } static long posix_cpu_nsleep_restart(struct restart_block *restart_block); static int posix_cpu_nsleep(const clockid_t which_clock, int flags, const struct timespec64 *rqtp) { struct restart_block *restart_block = ¤t->restart_block; int error; /* * Diagnose required errors first. */ if (CPUCLOCK_PERTHREAD(which_clock) && (CPUCLOCK_PID(which_clock) == 0 || CPUCLOCK_PID(which_clock) == task_pid_vnr(current))) return -EINVAL; error = do_cpu_nanosleep(which_clock, flags, rqtp); if (error == -ERESTART_RESTARTBLOCK) { if (flags & TIMER_ABSTIME) return -ERESTARTNOHAND; restart_block->nanosleep.clockid = which_clock; set_restart_fn(restart_block, posix_cpu_nsleep_restart); } return error; } static long posix_cpu_nsleep_restart(struct restart_block *restart_block) { clockid_t which_clock = restart_block->nanosleep.clockid; struct timespec64 t; t = ns_to_timespec64(restart_block->nanosleep.expires); return do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t); } #define PROCESS_CLOCK make_process_cpuclock(0, CPUCLOCK_SCHED) #define THREAD_CLOCK make_thread_cpuclock(0, CPUCLOCK_SCHED) static int process_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp) { return posix_cpu_clock_getres(PROCESS_CLOCK, tp); } static int process_cpu_clock_get(const clockid_t which_clock, struct timespec64 *tp) { return posix_cpu_clock_get(PROCESS_CLOCK, tp); } static int process_cpu_timer_create(struct k_itimer *timer) { timer->it_clock = PROCESS_CLOCK; return posix_cpu_timer_create(timer); } static int process_cpu_nsleep(const clockid_t which_clock, int flags, const struct timespec64 *rqtp) { return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp); } static int thread_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp) { return posix_cpu_clock_getres(THREAD_CLOCK, tp); } static int thread_cpu_clock_get(const clockid_t which_clock, struct timespec64 *tp) { return posix_cpu_clock_get(THREAD_CLOCK, tp); } static int thread_cpu_timer_create(struct k_itimer *timer) { timer->it_clock = THREAD_CLOCK; return posix_cpu_timer_create(timer); } const struct k_clock clock_posix_cpu = { .clock_getres = posix_cpu_clock_getres, .clock_set = posix_cpu_clock_set, .clock_get_timespec = posix_cpu_clock_get, .timer_create = posix_cpu_timer_create, .nsleep = posix_cpu_nsleep, .timer_set = posix_cpu_timer_set, .timer_del = posix_cpu_timer_del, .timer_get = posix_cpu_timer_get, .timer_rearm = posix_cpu_timer_rearm, .timer_wait_running = posix_cpu_timer_wait_running, }; const struct k_clock clock_process = { .clock_getres = process_cpu_clock_getres, .clock_get_timespec = process_cpu_clock_get, .timer_create = process_cpu_timer_create, .nsleep = process_cpu_nsleep, }; const struct k_clock clock_thread = { .clock_getres = thread_cpu_clock_getres, .clock_get_timespec = thread_cpu_clock_get, .timer_create = thread_cpu_timer_create, }; |
| 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 | // SPDX-License-Identifier: GPL-2.0 /* * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum * * Copyright 2018 Google LLC */ /* * "NHPoly1305" is the main component of Adiantum hashing. * Specifically, it is the calculation * * H_L ← Poly1305_{K_L}(NH_{K_N}(pad_{128}(L))) * * from the procedure in section 6.4 of the Adiantum paper [1]. It is an * ε-almost-∆-universal (ε-∆U) hash function for equal-length inputs over * Z/(2^{128}Z), where the "∆" operation is addition. It hashes 1024-byte * chunks of the input with the NH hash function [2], reducing the input length * by 32x. The resulting NH digests are evaluated as a polynomial in * GF(2^{130}-5), like in the Poly1305 MAC [3]. Note that the polynomial * evaluation by itself would suffice to achieve the ε-∆U property; NH is used * for performance since it's over twice as fast as Poly1305. * * This is *not* a cryptographic hash function; do not use it as such! * * [1] Adiantum: length-preserving encryption for entry-level processors * (https://eprint.iacr.org/2018/720.pdf) * [2] UMAC: Fast and Secure Message Authentication * (https://fastcrypto.org/umac/umac_proc.pdf) * [3] The Poly1305-AES message-authentication code * (https://cr.yp.to/mac/poly1305-20050329.pdf) */ #include <linux/unaligned.h> #include <crypto/algapi.h> #include <crypto/internal/hash.h> #include <crypto/internal/poly1305.h> #include <crypto/nhpoly1305.h> #include <linux/crypto.h> #include <linux/kernel.h> #include <linux/module.h> static void nh_generic(const u32 *key, const u8 *message, size_t message_len, __le64 hash[NH_NUM_PASSES]) { u64 sums[4] = { 0, 0, 0, 0 }; BUILD_BUG_ON(NH_PAIR_STRIDE != 2); BUILD_BUG_ON(NH_NUM_PASSES != 4); while (message_len) { u32 m0 = get_unaligned_le32(message + 0); u32 m1 = get_unaligned_le32(message + 4); u32 m2 = get_unaligned_le32(message + 8); u32 m3 = get_unaligned_le32(message + 12); sums[0] += (u64)(u32)(m0 + key[ 0]) * (u32)(m2 + key[ 2]); sums[1] += (u64)(u32)(m0 + key[ 4]) * (u32)(m2 + key[ 6]); sums[2] += (u64)(u32)(m0 + key[ 8]) * (u32)(m2 + key[10]); sums[3] += (u64)(u32)(m0 + key[12]) * (u32)(m2 + key[14]); sums[0] += (u64)(u32)(m1 + key[ 1]) * (u32)(m3 + key[ 3]); sums[1] += (u64)(u32)(m1 + key[ 5]) * (u32)(m3 + key[ 7]); sums[2] += (u64)(u32)(m1 + key[ 9]) * (u32)(m3 + key[11]); sums[3] += (u64)(u32)(m1 + key[13]) * (u32)(m3 + key[15]); key += NH_MESSAGE_UNIT / sizeof(key[0]); message += NH_MESSAGE_UNIT; message_len -= NH_MESSAGE_UNIT; } hash[0] = cpu_to_le64(sums[0]); hash[1] = cpu_to_le64(sums[1]); hash[2] = cpu_to_le64(sums[2]); hash[3] = cpu_to_le64(sums[3]); } /* Pass the next NH hash value through Poly1305 */ static void process_nh_hash_value(struct nhpoly1305_state *state, const struct nhpoly1305_key *key) { BUILD_BUG_ON(NH_HASH_BYTES % POLY1305_BLOCK_SIZE != 0); poly1305_core_blocks(&state->poly_state, &key->poly_key, state->nh_hash, NH_HASH_BYTES / POLY1305_BLOCK_SIZE, 1); } /* * Feed the next portion of the source data, as a whole number of 16-byte * "NH message units", through NH and Poly1305. Each NH hash is taken over * 1024 bytes, except possibly the final one which is taken over a multiple of * 16 bytes up to 1024. Also, in the case where data is passed in misaligned * chunks, we combine partial hashes; the end result is the same either way. */ static void nhpoly1305_units(struct nhpoly1305_state *state, const struct nhpoly1305_key *key, const u8 *src, unsigned int srclen, nh_t nh_fn) { do { unsigned int bytes; if (state->nh_remaining == 0) { /* Starting a new NH message */ bytes = min_t(unsigned int, srclen, NH_MESSAGE_BYTES); nh_fn(key->nh_key, src, bytes, state->nh_hash); state->nh_remaining = NH_MESSAGE_BYTES - bytes; } else { /* Continuing a previous NH message */ __le64 tmp_hash[NH_NUM_PASSES]; unsigned int pos; int i; pos = NH_MESSAGE_BYTES - state->nh_remaining; bytes = min(srclen, state->nh_remaining); nh_fn(&key->nh_key[pos / 4], src, bytes, tmp_hash); for (i = 0; i < NH_NUM_PASSES; i++) le64_add_cpu(&state->nh_hash[i], le64_to_cpu(tmp_hash[i])); state->nh_remaining -= bytes; } if (state->nh_remaining == 0) process_nh_hash_value(state, key); src += bytes; srclen -= bytes; } while (srclen); } int crypto_nhpoly1305_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { struct nhpoly1305_key *ctx = crypto_shash_ctx(tfm); int i; if (keylen != NHPOLY1305_KEY_SIZE) return -EINVAL; poly1305_core_setkey(&ctx->poly_key, key); key += POLY1305_BLOCK_SIZE; for (i = 0; i < NH_KEY_WORDS; i++) ctx->nh_key[i] = get_unaligned_le32(key + i * sizeof(u32)); return 0; } EXPORT_SYMBOL(crypto_nhpoly1305_setkey); int crypto_nhpoly1305_init(struct shash_desc *desc) { struct nhpoly1305_state *state = shash_desc_ctx(desc); poly1305_core_init(&state->poly_state); state->buflen = 0; state->nh_remaining = 0; return 0; } EXPORT_SYMBOL(crypto_nhpoly1305_init); int crypto_nhpoly1305_update_helper(struct shash_desc *desc, const u8 *src, unsigned int srclen, nh_t nh_fn) { struct nhpoly1305_state *state = shash_desc_ctx(desc); const struct nhpoly1305_key *key = crypto_shash_ctx(desc->tfm); unsigned int bytes; if (state->buflen) { bytes = min(srclen, (int)NH_MESSAGE_UNIT - state->buflen); memcpy(&state->buffer[state->buflen], src, bytes); state->buflen += bytes; if (state->buflen < NH_MESSAGE_UNIT) return 0; nhpoly1305_units(state, key, state->buffer, NH_MESSAGE_UNIT, nh_fn); state->buflen = 0; src += bytes; srclen -= bytes; } if (srclen >= NH_MESSAGE_UNIT) { bytes = round_down(srclen, NH_MESSAGE_UNIT); nhpoly1305_units(state, key, src, bytes, nh_fn); src += bytes; srclen -= bytes; } if (srclen) { memcpy(state->buffer, src, srclen); state->buflen = srclen; } return 0; } EXPORT_SYMBOL(crypto_nhpoly1305_update_helper); int crypto_nhpoly1305_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { return crypto_nhpoly1305_update_helper(desc, src, srclen, nh_generic); } EXPORT_SYMBOL(crypto_nhpoly1305_update); int crypto_nhpoly1305_final_helper(struct shash_desc *desc, u8 *dst, nh_t nh_fn) { struct nhpoly1305_state *state = shash_desc_ctx(desc); const struct nhpoly1305_key *key = crypto_shash_ctx(desc->tfm); if (state->buflen) { memset(&state->buffer[state->buflen], 0, NH_MESSAGE_UNIT - state->buflen); nhpoly1305_units(state, key, state->buffer, NH_MESSAGE_UNIT, nh_fn); } if (state->nh_remaining) process_nh_hash_value(state, key); poly1305_core_emit(&state->poly_state, NULL, dst); return 0; } EXPORT_SYMBOL(crypto_nhpoly1305_final_helper); int crypto_nhpoly1305_final(struct shash_desc *desc, u8 *dst) { return crypto_nhpoly1305_final_helper(desc, dst, nh_generic); } EXPORT_SYMBOL(crypto_nhpoly1305_final); static struct shash_alg nhpoly1305_alg = { .base.cra_name = "nhpoly1305", .base.cra_driver_name = "nhpoly1305-generic", .base.cra_priority = 100, .base.cra_ctxsize = sizeof(struct nhpoly1305_key), .base.cra_module = THIS_MODULE, .digestsize = POLY1305_DIGEST_SIZE, .init = crypto_nhpoly1305_init, .update = crypto_nhpoly1305_update, .final = crypto_nhpoly1305_final, .setkey = crypto_nhpoly1305_setkey, .descsize = sizeof(struct nhpoly1305_state), }; static int __init nhpoly1305_mod_init(void) { return crypto_register_shash(&nhpoly1305_alg); } static void __exit nhpoly1305_mod_exit(void) { crypto_unregister_shash(&nhpoly1305_alg); } module_init(nhpoly1305_mod_init); module_exit(nhpoly1305_mod_exit); MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>"); MODULE_ALIAS_CRYPTO("nhpoly1305"); MODULE_ALIAS_CRYPTO("nhpoly1305-generic"); |
| 10 10 10 10 1 9 9 9 1 3 2 1 1 1 3 3 2 2 2 2 1 1 1 1 1 1 6 1 5 4 3 2 2 2 3 2 2 5 4 3 3 3 1 2 2 2 2 2 2 2 3 3 2 2 2 12 12 12 6 5 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 | // SPDX-License-Identifier: GPL-2.0-only /* * drivers/dma-buf/sync_file.c * * Copyright (C) 2012 Google, Inc. */ #include <linux/dma-fence-unwrap.h> #include <linux/export.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/kernel.h> #include <linux/poll.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/anon_inodes.h> #include <linux/sync_file.h> #include <uapi/linux/sync_file.h> static const struct file_operations sync_file_fops; static struct sync_file *sync_file_alloc(void) { struct sync_file *sync_file; sync_file = kzalloc(sizeof(*sync_file), GFP_KERNEL); if (!sync_file) return NULL; sync_file->file = anon_inode_getfile("sync_file", &sync_file_fops, sync_file, 0); if (IS_ERR(sync_file->file)) goto err; init_waitqueue_head(&sync_file->wq); INIT_LIST_HEAD(&sync_file->cb.node); return sync_file; err: kfree(sync_file); return NULL; } static void fence_check_cb_func(struct dma_fence *f, struct dma_fence_cb *cb) { struct sync_file *sync_file; sync_file = container_of(cb, struct sync_file, cb); wake_up_all(&sync_file->wq); } /** * sync_file_create() - creates a sync file * @fence: fence to add to the sync_fence * * Creates a sync_file containg @fence. This function acquires and additional * reference of @fence for the newly-created &sync_file, if it succeeds. The * sync_file can be released with fput(sync_file->file). Returns the * sync_file or NULL in case of error. */ struct sync_file *sync_file_create(struct dma_fence *fence) { struct sync_file *sync_file; sync_file = sync_file_alloc(); if (!sync_file) return NULL; sync_file->fence = dma_fence_get(fence); return sync_file; } EXPORT_SYMBOL(sync_file_create); static struct sync_file *sync_file_fdget(int fd) { struct file *file = fget(fd); if (!file) return NULL; if (file->f_op != &sync_file_fops) goto err; return file->private_data; err: fput(file); return NULL; } /** * sync_file_get_fence - get the fence related to the sync_file fd * @fd: sync_file fd to get the fence from * * Ensures @fd references a valid sync_file and returns a fence that * represents all fence in the sync_file. On error NULL is returned. */ struct dma_fence *sync_file_get_fence(int fd) { struct sync_file *sync_file; struct dma_fence *fence; sync_file = sync_file_fdget(fd); if (!sync_file) return NULL; fence = dma_fence_get(sync_file->fence); fput(sync_file->file); return fence; } EXPORT_SYMBOL(sync_file_get_fence); /** * sync_file_get_name - get the name of the sync_file * @sync_file: sync_file to get the fence from * @buf: destination buffer to copy sync_file name into * @len: available size of destination buffer. * * Each sync_file may have a name assigned either by the user (when merging * sync_files together) or created from the fence it contains. In the latter * case construction of the name is deferred until use, and so requires * sync_file_get_name(). * * Returns: a string representing the name. */ char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len) { if (sync_file->user_name[0]) { strscpy(buf, sync_file->user_name, len); } else { struct dma_fence *fence = sync_file->fence; snprintf(buf, len, "%s-%s%llu-%lld", fence->ops->get_driver_name(fence), fence->ops->get_timeline_name(fence), fence->context, fence->seqno); } return buf; } /** * sync_file_merge() - merge two sync_files * @name: name of new fence * @a: sync_file a * @b: sync_file b * * Creates a new sync_file which contains copies of all the fences in both * @a and @b. @a and @b remain valid, independent sync_file. Returns the * new merged sync_file or NULL in case of error. */ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, struct sync_file *b) { struct sync_file *sync_file; struct dma_fence *fence; sync_file = sync_file_alloc(); if (!sync_file) return NULL; fence = dma_fence_unwrap_merge(a->fence, b->fence); if (!fence) { fput(sync_file->file); return NULL; } sync_file->fence = fence; strscpy(sync_file->user_name, name, sizeof(sync_file->user_name)); return sync_file; } static int sync_file_release(struct inode *inode, struct file *file) { struct sync_file *sync_file = file->private_data; if (test_bit(POLL_ENABLED, &sync_file->flags)) dma_fence_remove_callback(sync_file->fence, &sync_file->cb); dma_fence_put(sync_file->fence); kfree(sync_file); return 0; } static __poll_t sync_file_poll(struct file *file, poll_table *wait) { struct sync_file *sync_file = file->private_data; poll_wait(file, &sync_file->wq, wait); if (list_empty(&sync_file->cb.node) && !test_and_set_bit(POLL_ENABLED, &sync_file->flags)) { if (dma_fence_add_callback(sync_file->fence, &sync_file->cb, fence_check_cb_func) < 0) wake_up_all(&sync_file->wq); } return dma_fence_is_signaled(sync_file->fence) ? EPOLLIN : 0; } static long sync_file_ioctl_merge(struct sync_file *sync_file, unsigned long arg) { int fd = get_unused_fd_flags(O_CLOEXEC); int err; struct sync_file *fence2, *fence3; struct sync_merge_data data; if (fd < 0) return fd; if (copy_from_user(&data, (void __user *)arg, sizeof(data))) { err = -EFAULT; goto err_put_fd; } if (data.flags || data.pad) { err = -EINVAL; goto err_put_fd; } fence2 = sync_file_fdget(data.fd2); if (!fence2) { err = -ENOENT; goto err_put_fd; } data.name[sizeof(data.name) - 1] = '\0'; fence3 = sync_file_merge(data.name, sync_file, fence2); if (!fence3) { err = -ENOMEM; goto err_put_fence2; } data.fence = fd; if (copy_to_user((void __user *)arg, &data, sizeof(data))) { err = -EFAULT; goto err_put_fence3; } fd_install(fd, fence3->file); fput(fence2->file); return 0; err_put_fence3: fput(fence3->file); err_put_fence2: fput(fence2->file); err_put_fd: put_unused_fd(fd); return err; } static int sync_fill_fence_info(struct dma_fence *fence, struct sync_fence_info *info) { strscpy(info->obj_name, fence->ops->get_timeline_name(fence), sizeof(info->obj_name)); strscpy(info->driver_name, fence->ops->get_driver_name(fence), sizeof(info->driver_name)); info->status = dma_fence_get_status(fence); info->timestamp_ns = dma_fence_is_signaled(fence) ? ktime_to_ns(dma_fence_timestamp(fence)) : ktime_set(0, 0); return info->status; } static long sync_file_ioctl_fence_info(struct sync_file *sync_file, unsigned long arg) { struct sync_fence_info *fence_info = NULL; struct dma_fence_unwrap iter; struct sync_file_info info; unsigned int num_fences; struct dma_fence *fence; int ret; __u32 size; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) return -EFAULT; if (info.flags || info.pad) return -EINVAL; num_fences = 0; dma_fence_unwrap_for_each(fence, &iter, sync_file->fence) ++num_fences; /* * Passing num_fences = 0 means that userspace doesn't want to * retrieve any sync_fence_info. If num_fences = 0 we skip filling * sync_fence_info and return the actual number of fences on * info->num_fences. */ if (!info.num_fences) { info.status = dma_fence_get_status(sync_file->fence); goto no_fences; } else { info.status = 1; } if (info.num_fences < num_fences) return -EINVAL; size = num_fences * sizeof(*fence_info); fence_info = kzalloc(size, GFP_KERNEL); if (!fence_info) return -ENOMEM; num_fences = 0; dma_fence_unwrap_for_each(fence, &iter, sync_file->fence) { int status; status = sync_fill_fence_info(fence, &fence_info[num_fences++]); info.status = info.status <= 0 ? info.status : status; } if (copy_to_user(u64_to_user_ptr(info.sync_fence_info), fence_info, size)) { ret = -EFAULT; goto out; } no_fences: sync_file_get_name(sync_file, info.name, sizeof(info.name)); info.num_fences = num_fences; if (copy_to_user((void __user *)arg, &info, sizeof(info))) ret = -EFAULT; else ret = 0; out: kfree(fence_info); return ret; } static int sync_file_ioctl_set_deadline(struct sync_file *sync_file, unsigned long arg) { struct sync_set_deadline ts; if (copy_from_user(&ts, (void __user *)arg, sizeof(ts))) return -EFAULT; if (ts.pad) return -EINVAL; dma_fence_set_deadline(sync_file->fence, ns_to_ktime(ts.deadline_ns)); return 0; } static long sync_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct sync_file *sync_file = file->private_data; switch (cmd) { case SYNC_IOC_MERGE: return sync_file_ioctl_merge(sync_file, arg); case SYNC_IOC_FILE_INFO: return sync_file_ioctl_fence_info(sync_file, arg); case SYNC_IOC_SET_DEADLINE: return sync_file_ioctl_set_deadline(sync_file, arg); default: return -ENOTTY; } } static const struct file_operations sync_file_fops = { .release = sync_file_release, .poll = sync_file_poll, .unlocked_ioctl = sync_file_ioctl, .compat_ioctl = compat_ptr_ioctl, }; |
| 17 82 88 6 82 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NFNETLINK_H #define _NFNETLINK_H #include <linux/netlink.h> #include <linux/capability.h> #include <net/netlink.h> #include <uapi/linux/netfilter/nfnetlink.h> struct nfnl_info { struct net *net; struct sock *sk; const struct nlmsghdr *nlh; const struct nfgenmsg *nfmsg; struct netlink_ext_ack *extack; }; enum nfnl_callback_type { NFNL_CB_UNSPEC = 0, NFNL_CB_MUTEX, NFNL_CB_RCU, NFNL_CB_BATCH, }; struct nfnl_callback { int (*call)(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]); const struct nla_policy *policy; enum nfnl_callback_type type; __u16 attr_count; }; enum nfnl_abort_action { NFNL_ABORT_NONE = 0, NFNL_ABORT_AUTOLOAD, NFNL_ABORT_VALIDATE, }; struct nfnetlink_subsystem { const char *name; __u8 subsys_id; /* nfnetlink subsystem ID */ __u8 cb_count; /* number of callbacks */ const struct nfnl_callback *cb; /* callback for individual types */ struct module *owner; int (*commit)(struct net *net, struct sk_buff *skb); int (*abort)(struct net *net, struct sk_buff *skb, enum nfnl_abort_action action); bool (*valid_genid)(struct net *net, u32 genid); }; int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n); int nfnetlink_has_listeners(struct net *net, unsigned int group); int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, unsigned int group, int echo, gfp_t flags); int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error); int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid); void nfnetlink_broadcast(struct net *net, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation); static inline u16 nfnl_msg_type(u8 subsys, u8 msg_type) { return subsys << 8 | msg_type; } static inline void nfnl_fill_hdr(struct nlmsghdr *nlh, u8 family, u8 version, __be16 res_id) { struct nfgenmsg *nfmsg; nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = family; nfmsg->version = version; nfmsg->res_id = res_id; } static inline struct nlmsghdr *nfnl_msg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int flags, u8 family, u8 version, __be16 res_id) { struct nlmsghdr *nlh; nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags); if (!nlh) return NULL; nfnl_fill_hdr(nlh, family, version, res_id); return nlh; } void nfnl_lock(__u8 subsys_id); void nfnl_unlock(__u8 subsys_id); #ifdef CONFIG_PROVE_LOCKING bool lockdep_nfnl_is_held(__u8 subsys_id); #else static inline bool lockdep_nfnl_is_held(__u8 subsys_id) { return true; } #endif /* CONFIG_PROVE_LOCKING */ #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \ MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys)) #endif /* _NFNETLINK_H */ |
| 52 52 51 5 3 5 6 3 6 12 14 14 14 2 2 2 2 33 1 32 32 5 2 5 1 1 1 1 1 3 3 29 30 15 15 29 29 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 | // SPDX-License-Identifier: GPL-2.0-or-later /* * OSS compatible sequencer driver * * seq_oss_readq.c - MIDI input queue * * Copyright (C) 1998,99 Takashi Iwai <tiwai@suse.de> */ #include "seq_oss_readq.h" #include "seq_oss_event.h" #include <sound/seq_oss_legacy.h> #include "../seq_lock.h" #include <linux/wait.h> #include <linux/slab.h> /* * constants */ //#define SNDRV_SEQ_OSS_MAX_TIMEOUT (unsigned long)(-1) #define SNDRV_SEQ_OSS_MAX_TIMEOUT (HZ * 3600) /* * prototypes */ /* * create a read queue */ struct seq_oss_readq * snd_seq_oss_readq_new(struct seq_oss_devinfo *dp, int maxlen) { struct seq_oss_readq *q; q = kzalloc(sizeof(*q), GFP_KERNEL); if (!q) return NULL; q->q = kcalloc(maxlen, sizeof(union evrec), GFP_KERNEL); if (!q->q) { kfree(q); return NULL; } q->maxlen = maxlen; q->qlen = 0; q->head = q->tail = 0; init_waitqueue_head(&q->midi_sleep); spin_lock_init(&q->lock); q->pre_event_timeout = SNDRV_SEQ_OSS_MAX_TIMEOUT; q->input_time = (unsigned long)-1; return q; } /* * delete the read queue */ void snd_seq_oss_readq_delete(struct seq_oss_readq *q) { if (q) { kfree(q->q); kfree(q); } } /* * reset the read queue */ void snd_seq_oss_readq_clear(struct seq_oss_readq *q) { if (q->qlen) { q->qlen = 0; q->head = q->tail = 0; } /* if someone sleeping, wake'em up */ wake_up(&q->midi_sleep); q->input_time = (unsigned long)-1; } /* * put a midi byte */ int snd_seq_oss_readq_puts(struct seq_oss_readq *q, int dev, unsigned char *data, int len) { union evrec rec; int result; memset(&rec, 0, sizeof(rec)); rec.c[0] = SEQ_MIDIPUTC; rec.c[2] = dev; while (len-- > 0) { rec.c[1] = *data++; result = snd_seq_oss_readq_put_event(q, &rec); if (result < 0) return result; } return 0; } /* * put MIDI sysex bytes; the event buffer may be chained, thus it has * to be expanded via snd_seq_dump_var_event(). */ struct readq_sysex_ctx { struct seq_oss_readq *readq; int dev; }; static int readq_dump_sysex(void *ptr, void *buf, int count) { struct readq_sysex_ctx *ctx = ptr; return snd_seq_oss_readq_puts(ctx->readq, ctx->dev, buf, count); } int snd_seq_oss_readq_sysex(struct seq_oss_readq *q, int dev, struct snd_seq_event *ev) { struct readq_sysex_ctx ctx = { .readq = q, .dev = dev }; if ((ev->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) != SNDRV_SEQ_EVENT_LENGTH_VARIABLE) return 0; return snd_seq_dump_var_event(ev, readq_dump_sysex, &ctx); } /* * copy an event to input queue: * return zero if enqueued */ int snd_seq_oss_readq_put_event(struct seq_oss_readq *q, union evrec *ev) { unsigned long flags; spin_lock_irqsave(&q->lock, flags); if (q->qlen >= q->maxlen - 1) { spin_unlock_irqrestore(&q->lock, flags); return -ENOMEM; } memcpy(&q->q[q->tail], ev, sizeof(*ev)); q->tail = (q->tail + 1) % q->maxlen; q->qlen++; /* wake up sleeper */ wake_up(&q->midi_sleep); spin_unlock_irqrestore(&q->lock, flags); return 0; } /* * pop queue * caller must hold lock */ int snd_seq_oss_readq_pick(struct seq_oss_readq *q, union evrec *rec) { if (q->qlen == 0) return -EAGAIN; memcpy(rec, &q->q[q->head], sizeof(*rec)); return 0; } /* * sleep until ready */ void snd_seq_oss_readq_wait(struct seq_oss_readq *q) { wait_event_interruptible_timeout(q->midi_sleep, (q->qlen > 0 || q->head == q->tail), q->pre_event_timeout); } /* * drain one record * caller must hold lock */ void snd_seq_oss_readq_free(struct seq_oss_readq *q) { if (q->qlen > 0) { q->head = (q->head + 1) % q->maxlen; q->qlen--; } } /* * polling/select: * return non-zero if readq is not empty. */ unsigned int snd_seq_oss_readq_poll(struct seq_oss_readq *q, struct file *file, poll_table *wait) { poll_wait(file, &q->midi_sleep, wait); return q->qlen; } /* * put a timestamp */ int snd_seq_oss_readq_put_timestamp(struct seq_oss_readq *q, unsigned long curt, int seq_mode) { if (curt != q->input_time) { union evrec rec; memset(&rec, 0, sizeof(rec)); switch (seq_mode) { case SNDRV_SEQ_OSS_MODE_SYNTH: rec.echo = (curt << 8) | SEQ_WAIT; snd_seq_oss_readq_put_event(q, &rec); break; case SNDRV_SEQ_OSS_MODE_MUSIC: rec.t.code = EV_TIMING; rec.t.cmd = TMR_WAIT_ABS; rec.t.time = curt; snd_seq_oss_readq_put_event(q, &rec); break; } q->input_time = curt; } return 0; } #ifdef CONFIG_SND_PROC_FS /* * proc interface */ void snd_seq_oss_readq_info_read(struct seq_oss_readq *q, struct snd_info_buffer *buf) { snd_iprintf(buf, " read queue [%s] length = %d : tick = %ld\n", (waitqueue_active(&q->midi_sleep) ? "sleeping":"running"), q->qlen, q->input_time); } #endif /* CONFIG_SND_PROC_FS */ |
| 1 1 1 3 2 1 1 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 | /* * Copyright (c) 2016 Intel Corporation * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. */ #include <linux/uaccess.h> #include <drm/drm_atomic.h> #include <drm/drm_color_mgmt.h> #include <drm/drm_crtc.h> #include <drm/drm_device.h> #include <drm/drm_drv.h> #include <drm/drm_print.h> #include "drm_crtc_internal.h" /** * DOC: overview * * Color management or color space adjustments is supported through a set of 5 * properties on the &drm_crtc object. They are set up by calling * drm_crtc_enable_color_mgmt(). * * "DEGAMMA_LUT”: * Blob property to set the degamma lookup table (LUT) mapping pixel data * from the framebuffer before it is given to the transformation matrix. * The data is interpreted as an array of &struct drm_color_lut elements. * Hardware might choose not to use the full precision of the LUT elements * nor use all the elements of the LUT (for example the hardware might * choose to interpolate between LUT[0] and LUT[4]). * * Setting this to NULL (blob property value set to 0) means a * linear/pass-thru gamma table should be used. This is generally the * driver boot-up state too. Drivers can access this blob through * &drm_crtc_state.degamma_lut. * * “DEGAMMA_LUT_SIZE”: * Unsinged range property to give the size of the lookup table to be set * on the DEGAMMA_LUT property (the size depends on the underlying * hardware). If drivers support multiple LUT sizes then they should * publish the largest size, and sub-sample smaller sized LUTs (e.g. for * split-gamma modes) appropriately. * * “CTM”: * Blob property to set the current transformation matrix (CTM) apply to * pixel data after the lookup through the degamma LUT and before the * lookup through the gamma LUT. The data is interpreted as a struct * &drm_color_ctm. * * Setting this to NULL (blob property value set to 0) means a * unit/pass-thru matrix should be used. This is generally the driver * boot-up state too. Drivers can access the blob for the color conversion * matrix through &drm_crtc_state.ctm. * * “GAMMA_LUT”: * Blob property to set the gamma lookup table (LUT) mapping pixel data * after the transformation matrix to data sent to the connector. The * data is interpreted as an array of &struct drm_color_lut elements. * Hardware might choose not to use the full precision of the LUT elements * nor use all the elements of the LUT (for example the hardware might * choose to interpolate between LUT[0] and LUT[4]). * * Setting this to NULL (blob property value set to 0) means a * linear/pass-thru gamma table should be used. This is generally the * driver boot-up state too. Drivers can access this blob through * &drm_crtc_state.gamma_lut. * * Note that for mostly historical reasons stemming from Xorg heritage, * this is also used to store the color map (also sometimes color lut, CLUT * or color palette) for indexed formats like DRM_FORMAT_C8. * * “GAMMA_LUT_SIZE”: * Unsigned range property to give the size of the lookup table to be set * on the GAMMA_LUT property (the size depends on the underlying hardware). * If drivers support multiple LUT sizes then they should publish the * largest size, and sub-sample smaller sized LUTs (e.g. for split-gamma * modes) appropriately. * * There is also support for a legacy gamma table, which is set up by calling * drm_mode_crtc_set_gamma_size(). The DRM core will then alias the legacy gamma * ramp with "GAMMA_LUT" or, if that is unavailable, "DEGAMMA_LUT". * * Support for different non RGB color encodings is controlled through * &drm_plane specific COLOR_ENCODING and COLOR_RANGE properties. They * are set up by calling drm_plane_create_color_properties(). * * "COLOR_ENCODING": * Optional plane enum property to support different non RGB * color encodings. The driver can provide a subset of standard * enum values supported by the DRM plane. * * "COLOR_RANGE": * Optional plane enum property to support different non RGB * color parameter ranges. The driver can provide a subset of * standard enum values supported by the DRM plane. */ /** * drm_color_ctm_s31_32_to_qm_n * * @user_input: input value * @m: number of integer bits, only support m <= 32, include the sign-bit * @n: number of fractional bits, only support n <= 32 * * Convert and clamp S31.32 sign-magnitude to Qm.n (signed 2's complement). * The sign-bit BIT(m+n-1) and above are 0 for positive value and 1 for negative * the range of value is [-2^(m-1), 2^(m-1) - 2^-n] * * For example * A Q3.12 format number: * - required bit: 3 + 12 = 15bits * - range: [-2^2, 2^2 - 2^−15] * * NOTE: the m can be zero if all bit_precision are used to present fractional * bits like Q0.32 */ u64 drm_color_ctm_s31_32_to_qm_n(u64 user_input, u32 m, u32 n) { u64 mag = (user_input & ~BIT_ULL(63)) >> (32 - n); bool negative = !!(user_input & BIT_ULL(63)); s64 val; WARN_ON(m > 32 || n > 32); val = clamp_val(mag, 0, negative ? BIT_ULL(n + m - 1) : BIT_ULL(n + m - 1) - 1); return negative ? -val : val; } EXPORT_SYMBOL(drm_color_ctm_s31_32_to_qm_n); /** * drm_crtc_enable_color_mgmt - enable color management properties * @crtc: DRM CRTC * @degamma_lut_size: the size of the degamma lut (before CSC) * @has_ctm: whether to attach ctm_property for CSC matrix * @gamma_lut_size: the size of the gamma lut (after CSC) * * This function lets the driver enable the color correction * properties on a CRTC. This includes 3 degamma, csc and gamma * properties that userspace can set and 2 size properties to inform * the userspace of the lut sizes. Each of the properties are * optional. The gamma and degamma properties are only attached if * their size is not 0 and ctm_property is only attached if has_ctm is * true. */ void drm_crtc_enable_color_mgmt(struct drm_crtc *crtc, uint degamma_lut_size, bool has_ctm, uint gamma_lut_size) { struct drm_device *dev = crtc->dev; struct drm_mode_config *config = &dev->mode_config; if (degamma_lut_size) { drm_object_attach_property(&crtc->base, config->degamma_lut_property, 0); drm_object_attach_property(&crtc->base, config->degamma_lut_size_property, degamma_lut_size); } if (has_ctm) drm_object_attach_property(&crtc->base, config->ctm_property, 0); if (gamma_lut_size) { drm_object_attach_property(&crtc->base, config->gamma_lut_property, 0); drm_object_attach_property(&crtc->base, config->gamma_lut_size_property, gamma_lut_size); } } EXPORT_SYMBOL(drm_crtc_enable_color_mgmt); /** * drm_mode_crtc_set_gamma_size - set the gamma table size * @crtc: CRTC to set the gamma table size for * @gamma_size: size of the gamma table * * Drivers which support gamma tables should set this to the supported gamma * table size when initializing the CRTC. Currently the drm core only supports a * fixed gamma table size. * * Returns: * Zero on success, negative errno on failure. */ int drm_mode_crtc_set_gamma_size(struct drm_crtc *crtc, int gamma_size) { uint16_t *r_base, *g_base, *b_base; int i; crtc->gamma_size = gamma_size; crtc->gamma_store = kcalloc(gamma_size, sizeof(uint16_t) * 3, GFP_KERNEL); if (!crtc->gamma_store) { crtc->gamma_size = 0; return -ENOMEM; } r_base = crtc->gamma_store; g_base = r_base + gamma_size; b_base = g_base + gamma_size; for (i = 0; i < gamma_size; i++) { r_base[i] = i << 8; g_base[i] = i << 8; b_base[i] = i << 8; } return 0; } EXPORT_SYMBOL(drm_mode_crtc_set_gamma_size); /** * drm_crtc_supports_legacy_gamma - does the crtc support legacy gamma correction table * @crtc: CRTC object * * Returns true/false if the given crtc supports setting the legacy gamma * correction table. */ static bool drm_crtc_supports_legacy_gamma(struct drm_crtc *crtc) { u32 gamma_id = crtc->dev->mode_config.gamma_lut_property->base.id; u32 degamma_id = crtc->dev->mode_config.degamma_lut_property->base.id; if (!crtc->gamma_size) return false; if (crtc->funcs->gamma_set) return true; return !!(drm_mode_obj_find_prop_id(&crtc->base, gamma_id) || drm_mode_obj_find_prop_id(&crtc->base, degamma_id)); } /** * drm_crtc_legacy_gamma_set - set the legacy gamma correction table * @crtc: CRTC object * @red: red correction table * @green: green correction table * @blue: blue correction table * @size: size of the tables * @ctx: lock acquire context * * Implements support for legacy gamma correction table for drivers * that have set drm_crtc_funcs.gamma_set or that support color management * through the DEGAMMA_LUT/GAMMA_LUT properties. See * drm_crtc_enable_color_mgmt() and the containing chapter for * how the atomic color management and gamma tables work. * * This function sets the gamma using drm_crtc_funcs.gamma_set if set, or * alternatively using crtc color management properties. */ static int drm_crtc_legacy_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue, u32 size, struct drm_modeset_acquire_ctx *ctx) { struct drm_device *dev = crtc->dev; struct drm_atomic_state *state; struct drm_crtc_state *crtc_state; struct drm_property_blob *blob; struct drm_color_lut *blob_data; u32 gamma_id = dev->mode_config.gamma_lut_property->base.id; u32 degamma_id = dev->mode_config.degamma_lut_property->base.id; bool use_gamma_lut; int i, ret = 0; bool replaced; if (crtc->funcs->gamma_set) return crtc->funcs->gamma_set(crtc, red, green, blue, size, ctx); if (drm_mode_obj_find_prop_id(&crtc->base, gamma_id)) use_gamma_lut = true; else if (drm_mode_obj_find_prop_id(&crtc->base, degamma_id)) use_gamma_lut = false; else return -ENODEV; state = drm_atomic_state_alloc(crtc->dev); if (!state) return -ENOMEM; blob = drm_property_create_blob(dev, sizeof(struct drm_color_lut) * size, NULL); if (IS_ERR(blob)) { ret = PTR_ERR(blob); blob = NULL; goto fail; } /* Prepare GAMMA_LUT with the legacy values. */ blob_data = blob->data; for (i = 0; i < size; i++) { blob_data[i].red = red[i]; blob_data[i].green = green[i]; blob_data[i].blue = blue[i]; } state->acquire_ctx = ctx; crtc_state = drm_atomic_get_crtc_state(state, crtc); if (IS_ERR(crtc_state)) { ret = PTR_ERR(crtc_state); goto fail; } /* Set GAMMA_LUT and reset DEGAMMA_LUT and CTM */ replaced = drm_property_replace_blob(&crtc_state->degamma_lut, use_gamma_lut ? NULL : blob); replaced |= drm_property_replace_blob(&crtc_state->ctm, NULL); replaced |= drm_property_replace_blob(&crtc_state->gamma_lut, use_gamma_lut ? blob : NULL); crtc_state->color_mgmt_changed |= replaced; ret = drm_atomic_commit(state); fail: drm_atomic_state_put(state); drm_property_blob_put(blob); return ret; } /** * drm_mode_gamma_set_ioctl - set the gamma table * @dev: DRM device * @data: ioctl data * @file_priv: DRM file info * * Set the gamma table of a CRTC to the one passed in by the user. Userspace can * inquire the required gamma table size through drm_mode_gamma_get_ioctl. * * Called by the user via ioctl. * * Returns: * Zero on success, negative errno on failure. */ int drm_mode_gamma_set_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_crtc_lut *crtc_lut = data; struct drm_crtc *crtc; void *r_base, *g_base, *b_base; int size; struct drm_modeset_acquire_ctx ctx; int ret = 0; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; crtc = drm_crtc_find(dev, file_priv, crtc_lut->crtc_id); if (!crtc) return -ENOENT; if (!drm_crtc_supports_legacy_gamma(crtc)) return -ENOSYS; /* memcpy into gamma store */ if (crtc_lut->gamma_size != crtc->gamma_size) return -EINVAL; DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret); size = crtc_lut->gamma_size * (sizeof(uint16_t)); r_base = crtc->gamma_store; if (copy_from_user(r_base, (void __user *)(unsigned long)crtc_lut->red, size)) { ret = -EFAULT; goto out; } g_base = r_base + size; if (copy_from_user(g_base, (void __user *)(unsigned long)crtc_lut->green, size)) { ret = -EFAULT; goto out; } b_base = g_base + size; if (copy_from_user(b_base, (void __user *)(unsigned long)crtc_lut->blue, size)) { ret = -EFAULT; goto out; } ret = drm_crtc_legacy_gamma_set(crtc, r_base, g_base, b_base, crtc->gamma_size, &ctx); out: DRM_MODESET_LOCK_ALL_END(dev, ctx, ret); return ret; } /** * drm_mode_gamma_get_ioctl - get the gamma table * @dev: DRM device * @data: ioctl data * @file_priv: DRM file info * * Copy the current gamma table into the storage provided. This also provides * the gamma table size the driver expects, which can be used to size the * allocated storage. * * Called by the user via ioctl. * * Returns: * Zero on success, negative errno on failure. */ int drm_mode_gamma_get_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_crtc_lut *crtc_lut = data; struct drm_crtc *crtc; void *r_base, *g_base, *b_base; int size; int ret = 0; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; crtc = drm_crtc_find(dev, file_priv, crtc_lut->crtc_id); if (!crtc) return -ENOENT; /* memcpy into gamma store */ if (crtc_lut->gamma_size != crtc->gamma_size) return -EINVAL; drm_modeset_lock(&crtc->mutex, NULL); size = crtc_lut->gamma_size * (sizeof(uint16_t)); r_base = crtc->gamma_store; if (copy_to_user((void __user *)(unsigned long)crtc_lut->red, r_base, size)) { ret = -EFAULT; goto out; } g_base = r_base + size; if (copy_to_user((void __user *)(unsigned long)crtc_lut->green, g_base, size)) { ret = -EFAULT; goto out; } b_base = g_base + size; if (copy_to_user((void __user *)(unsigned long)crtc_lut->blue, b_base, size)) { ret = -EFAULT; goto out; } out: drm_modeset_unlock(&crtc->mutex); return ret; } static const char * const color_encoding_name[] = { [DRM_COLOR_YCBCR_BT601] = "ITU-R BT.601 YCbCr", [DRM_COLOR_YCBCR_BT709] = "ITU-R BT.709 YCbCr", [DRM_COLOR_YCBCR_BT2020] = "ITU-R BT.2020 YCbCr", }; static const char * const color_range_name[] = { [DRM_COLOR_YCBCR_FULL_RANGE] = "YCbCr full range", [DRM_COLOR_YCBCR_LIMITED_RANGE] = "YCbCr limited range", }; /** * drm_get_color_encoding_name - return a string for color encoding * @encoding: color encoding to compute name of * * In contrast to the other drm_get_*_name functions this one here returns a * const pointer and hence is threadsafe. */ const char *drm_get_color_encoding_name(enum drm_color_encoding encoding) { if (WARN_ON(encoding >= ARRAY_SIZE(color_encoding_name))) return "unknown"; return color_encoding_name[encoding]; } /** * drm_get_color_range_name - return a string for color range * @range: color range to compute name of * * In contrast to the other drm_get_*_name functions this one here returns a * const pointer and hence is threadsafe. */ const char *drm_get_color_range_name(enum drm_color_range range) { if (WARN_ON(range >= ARRAY_SIZE(color_range_name))) return "unknown"; return color_range_name[range]; } /** * drm_plane_create_color_properties - color encoding related plane properties * @plane: plane object * @supported_encodings: bitfield indicating supported color encodings * @supported_ranges: bitfileld indicating supported color ranges * @default_encoding: default color encoding * @default_range: default color range * * Create and attach plane specific COLOR_ENCODING and COLOR_RANGE * properties to @plane. The supported encodings and ranges should * be provided in supported_encodings and supported_ranges bitmasks. * Each bit set in the bitmask indicates that its number as enum * value is supported. */ int drm_plane_create_color_properties(struct drm_plane *plane, u32 supported_encodings, u32 supported_ranges, enum drm_color_encoding default_encoding, enum drm_color_range default_range) { struct drm_device *dev = plane->dev; struct drm_property *prop; struct drm_prop_enum_list enum_list[MAX_T(int, DRM_COLOR_ENCODING_MAX, DRM_COLOR_RANGE_MAX)]; int i, len; if (WARN_ON(supported_encodings == 0 || (supported_encodings & -BIT(DRM_COLOR_ENCODING_MAX)) != 0 || (supported_encodings & BIT(default_encoding)) == 0)) return -EINVAL; if (WARN_ON(supported_ranges == 0 || (supported_ranges & -BIT(DRM_COLOR_RANGE_MAX)) != 0 || (supported_ranges & BIT(default_range)) == 0)) return -EINVAL; len = 0; for (i = 0; i < DRM_COLOR_ENCODING_MAX; i++) { if ((supported_encodings & BIT(i)) == 0) continue; enum_list[len].type = i; enum_list[len].name = color_encoding_name[i]; len++; } prop = drm_property_create_enum(dev, 0, "COLOR_ENCODING", enum_list, len); if (!prop) return -ENOMEM; plane->color_encoding_property = prop; drm_object_attach_property(&plane->base, prop, default_encoding); if (plane->state) plane->state->color_encoding = default_encoding; len = 0; for (i = 0; i < DRM_COLOR_RANGE_MAX; i++) { if ((supported_ranges & BIT(i)) == 0) continue; enum_list[len].type = i; enum_list[len].name = color_range_name[i]; len++; } prop = drm_property_create_enum(dev, 0, "COLOR_RANGE", enum_list, len); if (!prop) return -ENOMEM; plane->color_range_property = prop; drm_object_attach_property(&plane->base, prop, default_range); if (plane->state) plane->state->color_range = default_range; return 0; } EXPORT_SYMBOL(drm_plane_create_color_properties); /** * drm_color_lut_check - check validity of lookup table * @lut: property blob containing LUT to check * @tests: bitmask of tests to run * * Helper to check whether a userspace-provided lookup table is valid and * satisfies hardware requirements. Drivers pass a bitmask indicating which of * the tests in &drm_color_lut_tests should be performed. * * Returns 0 on success, -EINVAL on failure. */ int drm_color_lut_check(const struct drm_property_blob *lut, u32 tests) { const struct drm_color_lut *entry; int i; if (!lut || !tests) return 0; entry = lut->data; for (i = 0; i < drm_color_lut_size(lut); i++) { if (tests & DRM_COLOR_LUT_EQUAL_CHANNELS) { if (entry[i].red != entry[i].blue || entry[i].red != entry[i].green) { DRM_DEBUG_KMS("All LUT entries must have equal r/g/b\n"); return -EINVAL; } } if (i > 0 && tests & DRM_COLOR_LUT_NON_DECREASING) { if (entry[i].red < entry[i - 1].red || entry[i].green < entry[i - 1].green || entry[i].blue < entry[i - 1].blue) { DRM_DEBUG_KMS("LUT entries must never decrease.\n"); return -EINVAL; } } } return 0; } EXPORT_SYMBOL(drm_color_lut_check); |
| 2 2 1 1 1 2 1 1 1 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 | // SPDX-License-Identifier: GPL-2.0 /* * virtio-fs: Virtio Filesystem * Copyright (C) 2018 Red Hat, Inc. */ #include <linux/fs.h> #include <linux/dax.h> #include <linux/pci.h> #include <linux/interrupt.h> #include <linux/group_cpus.h> #include <linux/pfn_t.h> #include <linux/memremap.h> #include <linux/module.h> #include <linux/virtio.h> #include <linux/virtio_fs.h> #include <linux/delay.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/highmem.h> #include <linux/cleanup.h> #include <linux/uio.h> #include "fuse_i.h" /* Used to help calculate the FUSE connection's max_pages limit for a request's * size. Parts of the struct fuse_req are sliced into scattergather lists in * addition to the pages used, so this can help account for that overhead. */ #define FUSE_HEADER_OVERHEAD 4 /* List of virtio-fs device instances and a lock for the list. Also provides * mutual exclusion in device removal and mounting path */ static DEFINE_MUTEX(virtio_fs_mutex); static LIST_HEAD(virtio_fs_instances); /* The /sys/fs/virtio_fs/ kset */ static struct kset *virtio_fs_kset; enum { VQ_HIPRIO, VQ_REQUEST }; #define VQ_NAME_LEN 24 /* Per-virtqueue state */ struct virtio_fs_vq { spinlock_t lock; struct virtqueue *vq; /* protected by ->lock */ struct work_struct done_work; struct list_head queued_reqs; struct list_head end_reqs; /* End these requests */ struct work_struct dispatch_work; struct fuse_dev *fud; bool connected; long in_flight; struct completion in_flight_zero; /* No inflight requests */ struct kobject *kobj; char name[VQ_NAME_LEN]; } ____cacheline_aligned_in_smp; /* A virtio-fs device instance */ struct virtio_fs { struct kobject kobj; struct kobject *mqs_kobj; struct list_head list; /* on virtio_fs_instances */ char *tag; struct virtio_fs_vq *vqs; unsigned int nvqs; /* number of virtqueues */ unsigned int num_request_queues; /* number of request queues */ struct dax_device *dax_dev; unsigned int *mq_map; /* index = cpu id, value = request vq id */ /* DAX memory window where file contents are mapped */ void *window_kaddr; phys_addr_t window_phys_addr; size_t window_len; }; struct virtio_fs_forget_req { struct fuse_in_header ih; struct fuse_forget_in arg; }; struct virtio_fs_forget { /* This request can be temporarily queued on virt queue */ struct list_head list; struct virtio_fs_forget_req req; }; struct virtio_fs_req_work { struct fuse_req *req; struct virtio_fs_vq *fsvq; struct work_struct done_work; }; static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, struct fuse_req *req, bool in_flight, gfp_t gfp); static const struct constant_table dax_param_enums[] = { {"always", FUSE_DAX_ALWAYS }, {"never", FUSE_DAX_NEVER }, {"inode", FUSE_DAX_INODE_USER }, {} }; enum { OPT_DAX, OPT_DAX_ENUM, }; static const struct fs_parameter_spec virtio_fs_parameters[] = { fsparam_flag("dax", OPT_DAX), fsparam_enum("dax", OPT_DAX_ENUM, dax_param_enums), {} }; static int virtio_fs_parse_param(struct fs_context *fsc, struct fs_parameter *param) { struct fs_parse_result result; struct fuse_fs_context *ctx = fsc->fs_private; int opt; opt = fs_parse(fsc, virtio_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case OPT_DAX: ctx->dax_mode = FUSE_DAX_ALWAYS; break; case OPT_DAX_ENUM: ctx->dax_mode = result.uint_32; break; default: return -EINVAL; } return 0; } static void virtio_fs_free_fsc(struct fs_context *fsc) { struct fuse_fs_context *ctx = fsc->fs_private; kfree(ctx); } static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) { struct virtio_fs *fs = vq->vdev->priv; return &fs->vqs[vq->index]; } /* Should be called with fsvq->lock held. */ static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) { fsvq->in_flight++; } /* Should be called with fsvq->lock held. */ static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq) { WARN_ON(fsvq->in_flight <= 0); fsvq->in_flight--; if (!fsvq->in_flight) complete(&fsvq->in_flight_zero); } static ssize_t tag_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj); return sysfs_emit(buf, "%s\n", fs->tag); } static struct kobj_attribute virtio_fs_tag_attr = __ATTR_RO(tag); static struct attribute *virtio_fs_attrs[] = { &virtio_fs_tag_attr.attr, NULL }; ATTRIBUTE_GROUPS(virtio_fs); static void virtio_fs_ktype_release(struct kobject *kobj) { struct virtio_fs *vfs = container_of(kobj, struct virtio_fs, kobj); kfree(vfs->mq_map); kfree(vfs->vqs); kfree(vfs); } static const struct kobj_type virtio_fs_ktype = { .release = virtio_fs_ktype_release, .sysfs_ops = &kobj_sysfs_ops, .default_groups = virtio_fs_groups, }; static struct virtio_fs_vq *virtio_fs_kobj_to_vq(struct virtio_fs *fs, struct kobject *kobj) { int i; for (i = 0; i < fs->nvqs; i++) { if (kobj == fs->vqs[i].kobj) return &fs->vqs[i]; } return NULL; } static ssize_t name_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct virtio_fs *fs = container_of(kobj->parent->parent, struct virtio_fs, kobj); struct virtio_fs_vq *fsvq = virtio_fs_kobj_to_vq(fs, kobj); if (!fsvq) return -EINVAL; return sysfs_emit(buf, "%s\n", fsvq->name); } static struct kobj_attribute virtio_fs_vq_name_attr = __ATTR_RO(name); static ssize_t cpu_list_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct virtio_fs *fs = container_of(kobj->parent->parent, struct virtio_fs, kobj); struct virtio_fs_vq *fsvq = virtio_fs_kobj_to_vq(fs, kobj); unsigned int cpu, qid; const size_t size = PAGE_SIZE - 1; bool first = true; int ret = 0, pos = 0; if (!fsvq) return -EINVAL; qid = fsvq->vq->index; for (cpu = 0; cpu < nr_cpu_ids; cpu++) { if (qid < VQ_REQUEST || (fs->mq_map[cpu] == qid)) { if (first) ret = snprintf(buf + pos, size - pos, "%u", cpu); else ret = snprintf(buf + pos, size - pos, ", %u", cpu); if (ret >= size - pos) break; first = false; pos += ret; } } ret = snprintf(buf + pos, size + 1 - pos, "\n"); return pos + ret; } static struct kobj_attribute virtio_fs_vq_cpu_list_attr = __ATTR_RO(cpu_list); static struct attribute *virtio_fs_vq_attrs[] = { &virtio_fs_vq_name_attr.attr, &virtio_fs_vq_cpu_list_attr.attr, NULL }; static struct attribute_group virtio_fs_vq_attr_group = { .attrs = virtio_fs_vq_attrs, }; /* Make sure virtiofs_mutex is held */ static void virtio_fs_put_locked(struct virtio_fs *fs) { lockdep_assert_held(&virtio_fs_mutex); kobject_put(&fs->kobj); } static void virtio_fs_put(struct virtio_fs *fs) { mutex_lock(&virtio_fs_mutex); virtio_fs_put_locked(fs); mutex_unlock(&virtio_fs_mutex); } static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) { struct virtio_fs *vfs = fiq->priv; virtio_fs_put(vfs); } static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) { WARN_ON(fsvq->in_flight < 0); /* Wait for in flight requests to finish.*/ spin_lock(&fsvq->lock); if (fsvq->in_flight) { /* We are holding virtio_fs_mutex. There should not be any * waiters waiting for completion. */ reinit_completion(&fsvq->in_flight_zero); spin_unlock(&fsvq->lock); wait_for_completion(&fsvq->in_flight_zero); } else { spin_unlock(&fsvq->lock); } flush_work(&fsvq->done_work); flush_work(&fsvq->dispatch_work); } static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs) { struct virtio_fs_vq *fsvq; int i; for (i = 0; i < fs->nvqs; i++) { fsvq = &fs->vqs[i]; virtio_fs_drain_queue(fsvq); } } static void virtio_fs_drain_all_queues(struct virtio_fs *fs) { /* Provides mutual exclusion between ->remove and ->kill_sb * paths. We don't want both of these draining queue at the * same time. Current completion logic reinits completion * and that means there should not be any other thread * doing reinit or waiting for completion already. */ mutex_lock(&virtio_fs_mutex); virtio_fs_drain_all_queues_locked(fs); mutex_unlock(&virtio_fs_mutex); } static void virtio_fs_start_all_queues(struct virtio_fs *fs) { struct virtio_fs_vq *fsvq; int i; for (i = 0; i < fs->nvqs; i++) { fsvq = &fs->vqs[i]; spin_lock(&fsvq->lock); fsvq->connected = true; spin_unlock(&fsvq->lock); } } static void virtio_fs_delete_queues_sysfs(struct virtio_fs *fs) { struct virtio_fs_vq *fsvq; int i; for (i = 0; i < fs->nvqs; i++) { fsvq = &fs->vqs[i]; kobject_put(fsvq->kobj); } } static int virtio_fs_add_queues_sysfs(struct virtio_fs *fs) { struct virtio_fs_vq *fsvq; char buff[12]; int i, j, ret; for (i = 0; i < fs->nvqs; i++) { fsvq = &fs->vqs[i]; sprintf(buff, "%d", i); fsvq->kobj = kobject_create_and_add(buff, fs->mqs_kobj); if (!fs->mqs_kobj) { ret = -ENOMEM; goto out_del; } ret = sysfs_create_group(fsvq->kobj, &virtio_fs_vq_attr_group); if (ret) { kobject_put(fsvq->kobj); goto out_del; } } return 0; out_del: for (j = 0; j < i; j++) { fsvq = &fs->vqs[j]; kobject_put(fsvq->kobj); } return ret; } /* Add a new instance to the list or return -EEXIST if tag name exists*/ static int virtio_fs_add_instance(struct virtio_device *vdev, struct virtio_fs *fs) { struct virtio_fs *fs2; int ret; mutex_lock(&virtio_fs_mutex); list_for_each_entry(fs2, &virtio_fs_instances, list) { if (strcmp(fs->tag, fs2->tag) == 0) { mutex_unlock(&virtio_fs_mutex); return -EEXIST; } } /* Use the virtio_device's index as a unique identifier, there is no * need to allocate our own identifiers because the virtio_fs instance * is only visible to userspace as long as the underlying virtio_device * exists. */ fs->kobj.kset = virtio_fs_kset; ret = kobject_add(&fs->kobj, NULL, "%d", vdev->index); if (ret < 0) goto out_unlock; fs->mqs_kobj = kobject_create_and_add("mqs", &fs->kobj); if (!fs->mqs_kobj) { ret = -ENOMEM; goto out_del; } ret = sysfs_create_link(&fs->kobj, &vdev->dev.kobj, "device"); if (ret < 0) goto out_put; ret = virtio_fs_add_queues_sysfs(fs); if (ret) goto out_remove; list_add_tail(&fs->list, &virtio_fs_instances); mutex_unlock(&virtio_fs_mutex); kobject_uevent(&fs->kobj, KOBJ_ADD); return 0; out_remove: sysfs_remove_link(&fs->kobj, "device"); out_put: kobject_put(fs->mqs_kobj); out_del: kobject_del(&fs->kobj); out_unlock: mutex_unlock(&virtio_fs_mutex); return ret; } /* Return the virtio_fs with a given tag, or NULL */ static struct virtio_fs *virtio_fs_find_instance(const char *tag) { struct virtio_fs *fs; mutex_lock(&virtio_fs_mutex); list_for_each_entry(fs, &virtio_fs_instances, list) { if (strcmp(fs->tag, tag) == 0) { kobject_get(&fs->kobj); goto found; } } fs = NULL; /* not found */ found: mutex_unlock(&virtio_fs_mutex); return fs; } static void virtio_fs_free_devs(struct virtio_fs *fs) { unsigned int i; for (i = 0; i < fs->nvqs; i++) { struct virtio_fs_vq *fsvq = &fs->vqs[i]; if (!fsvq->fud) continue; fuse_dev_free(fsvq->fud); fsvq->fud = NULL; } } /* Read filesystem name from virtio config into fs->tag (must kfree()). */ static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs) { char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; char *end; size_t len; virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag), &tag_buf, sizeof(tag_buf)); end = memchr(tag_buf, '\0', sizeof(tag_buf)); if (end == tag_buf) return -EINVAL; /* empty tag */ if (!end) end = &tag_buf[sizeof(tag_buf)]; len = end - tag_buf; fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL); if (!fs->tag) return -ENOMEM; memcpy(fs->tag, tag_buf, len); fs->tag[len] = '\0'; /* While the VIRTIO specification allows any character, newlines are * awkward on mount(8) command-lines and cause problems in the sysfs * "tag" attr and uevent TAG= properties. Forbid them. */ if (strchr(fs->tag, '\n')) { dev_dbg(&vdev->dev, "refusing virtiofs tag with newline character\n"); return -EINVAL; } dev_info(&vdev->dev, "discovered new tag: %s\n", fs->tag); return 0; } /* Work function for hiprio completion */ static void virtio_fs_hiprio_done_work(struct work_struct *work) { struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, done_work); struct virtqueue *vq = fsvq->vq; /* Free completed FUSE_FORGET requests */ spin_lock(&fsvq->lock); do { unsigned int len; void *req; virtqueue_disable_cb(vq); while ((req = virtqueue_get_buf(vq, &len)) != NULL) { kfree(req); dec_in_flight_req(fsvq); } } while (!virtqueue_enable_cb(vq)); if (!list_empty(&fsvq->queued_reqs)) schedule_work(&fsvq->dispatch_work); spin_unlock(&fsvq->lock); } static void virtio_fs_request_dispatch_work(struct work_struct *work) { struct fuse_req *req; struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, dispatch_work); int ret; pr_debug("virtio-fs: worker %s called.\n", __func__); while (1) { spin_lock(&fsvq->lock); req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req, list); if (!req) { spin_unlock(&fsvq->lock); break; } list_del_init(&req->list); spin_unlock(&fsvq->lock); fuse_request_end(req); } /* Dispatch pending requests */ while (1) { unsigned int flags; spin_lock(&fsvq->lock); req = list_first_entry_or_null(&fsvq->queued_reqs, struct fuse_req, list); if (!req) { spin_unlock(&fsvq->lock); return; } list_del_init(&req->list); spin_unlock(&fsvq->lock); flags = memalloc_nofs_save(); ret = virtio_fs_enqueue_req(fsvq, req, true, GFP_KERNEL); memalloc_nofs_restore(flags); if (ret < 0) { if (ret == -ENOSPC) { spin_lock(&fsvq->lock); list_add_tail(&req->list, &fsvq->queued_reqs); spin_unlock(&fsvq->lock); return; } req->out.h.error = ret; spin_lock(&fsvq->lock); dec_in_flight_req(fsvq); spin_unlock(&fsvq->lock); pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); fuse_request_end(req); } } } /* * Returns 1 if queue is full and sender should wait a bit before sending * next request, 0 otherwise. */ static int send_forget_request(struct virtio_fs_vq *fsvq, struct virtio_fs_forget *forget, bool in_flight) { struct scatterlist sg; struct virtqueue *vq; int ret = 0; bool notify; struct virtio_fs_forget_req *req = &forget->req; spin_lock(&fsvq->lock); if (!fsvq->connected) { if (in_flight) dec_in_flight_req(fsvq); kfree(forget); goto out; } sg_init_one(&sg, req, sizeof(*req)); vq = fsvq->vq; dev_dbg(&vq->vdev->dev, "%s\n", __func__); ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC); if (ret < 0) { if (ret == -ENOSPC) { pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", ret); list_add_tail(&forget->list, &fsvq->queued_reqs); if (!in_flight) inc_in_flight_req(fsvq); /* Queue is full */ ret = 1; } else { pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", ret); kfree(forget); if (in_flight) dec_in_flight_req(fsvq); } goto out; } if (!in_flight) inc_in_flight_req(fsvq); notify = virtqueue_kick_prepare(vq); spin_unlock(&fsvq->lock); if (notify) virtqueue_notify(vq); return ret; out: spin_unlock(&fsvq->lock); return ret; } static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) { struct virtio_fs_forget *forget; struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, dispatch_work); pr_debug("virtio-fs: worker %s called.\n", __func__); while (1) { spin_lock(&fsvq->lock); forget = list_first_entry_or_null(&fsvq->queued_reqs, struct virtio_fs_forget, list); if (!forget) { spin_unlock(&fsvq->lock); return; } list_del(&forget->list); spin_unlock(&fsvq->lock); if (send_forget_request(fsvq, forget, true)) return; } } /* Allocate and copy args into req->argbuf */ static int copy_args_to_argbuf(struct fuse_req *req, gfp_t gfp) { struct fuse_args *args = req->args; unsigned int offset = 0; unsigned int num_in; unsigned int num_out; unsigned int len; unsigned int i; num_in = args->in_numargs - args->in_pages; num_out = args->out_numargs - args->out_pages; len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) + fuse_len_args(num_out, args->out_args); req->argbuf = kmalloc(len, gfp); if (!req->argbuf) return -ENOMEM; for (i = 0; i < num_in; i++) { memcpy(req->argbuf + offset, args->in_args[i].value, args->in_args[i].size); offset += args->in_args[i].size; } return 0; } /* Copy args out of and free req->argbuf */ static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) { unsigned int remaining; unsigned int offset; unsigned int num_in; unsigned int num_out; unsigned int i; remaining = req->out.h.len - sizeof(req->out.h); num_in = args->in_numargs - args->in_pages; num_out = args->out_numargs - args->out_pages; offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args); for (i = 0; i < num_out; i++) { unsigned int argsize = args->out_args[i].size; if (args->out_argvar && i == args->out_numargs - 1 && argsize > remaining) { argsize = remaining; } memcpy(args->out_args[i].value, req->argbuf + offset, argsize); offset += argsize; if (i != args->out_numargs - 1) remaining -= argsize; } /* Store the actual size of the variable-length arg */ if (args->out_argvar) args->out_args[args->out_numargs - 1].size = remaining; kfree(req->argbuf); req->argbuf = NULL; } /* Work function for request completion */ static void virtio_fs_request_complete(struct fuse_req *req, struct virtio_fs_vq *fsvq) { struct fuse_pqueue *fpq = &fsvq->fud->pq; struct fuse_args *args; struct fuse_args_pages *ap; unsigned int len, i, thislen; struct folio *folio; /* * TODO verify that server properly follows FUSE protocol * (oh.uniq, oh.len) */ args = req->args; copy_args_from_argbuf(args, req); if (args->out_pages && args->page_zeroing) { len = args->out_args[args->out_numargs - 1].size; ap = container_of(args, typeof(*ap), args); for (i = 0; i < ap->num_folios; i++) { thislen = ap->descs[i].length; if (len < thislen) { WARN_ON(ap->descs[i].offset); folio = ap->folios[i]; folio_zero_segment(folio, len, thislen); len = 0; } else { len -= thislen; } } } spin_lock(&fpq->lock); clear_bit(FR_SENT, &req->flags); spin_unlock(&fpq->lock); fuse_request_end(req); spin_lock(&fsvq->lock); dec_in_flight_req(fsvq); spin_unlock(&fsvq->lock); } static void virtio_fs_complete_req_work(struct work_struct *work) { struct virtio_fs_req_work *w = container_of(work, typeof(*w), done_work); virtio_fs_request_complete(w->req, w->fsvq); kfree(w); } static void virtio_fs_requests_done_work(struct work_struct *work) { struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, done_work); struct fuse_pqueue *fpq = &fsvq->fud->pq; struct virtqueue *vq = fsvq->vq; struct fuse_req *req; struct fuse_req *next; unsigned int len; LIST_HEAD(reqs); /* Collect completed requests off the virtqueue */ spin_lock(&fsvq->lock); do { virtqueue_disable_cb(vq); while ((req = virtqueue_get_buf(vq, &len)) != NULL) { spin_lock(&fpq->lock); list_move_tail(&req->list, &reqs); spin_unlock(&fpq->lock); } } while (!virtqueue_enable_cb(vq)); spin_unlock(&fsvq->lock); /* End requests */ list_for_each_entry_safe(req, next, &reqs, list) { list_del_init(&req->list); /* blocking async request completes in a worker context */ if (req->args->may_block) { struct virtio_fs_req_work *w; w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL); INIT_WORK(&w->done_work, virtio_fs_complete_req_work); w->fsvq = fsvq; w->req = req; schedule_work(&w->done_work); } else { virtio_fs_request_complete(req, fsvq); } } /* Try to push previously queued requests, as the queue might no longer be full */ spin_lock(&fsvq->lock); if (!list_empty(&fsvq->queued_reqs)) schedule_work(&fsvq->dispatch_work); spin_unlock(&fsvq->lock); } static void virtio_fs_map_queues(struct virtio_device *vdev, struct virtio_fs *fs) { const struct cpumask *mask, *masks; unsigned int q, cpu; /* First attempt to map using existing transport layer affinities * e.g. PCIe MSI-X */ if (!vdev->config->get_vq_affinity) goto fallback; for (q = 0; q < fs->num_request_queues; q++) { mask = vdev->config->get_vq_affinity(vdev, VQ_REQUEST + q); if (!mask) goto fallback; for_each_cpu(cpu, mask) fs->mq_map[cpu] = q + VQ_REQUEST; } return; fallback: /* Attempt to map evenly in groups over the CPUs */ masks = group_cpus_evenly(fs->num_request_queues); /* If even this fails we default to all CPUs use first request queue */ if (!masks) { for_each_possible_cpu(cpu) fs->mq_map[cpu] = VQ_REQUEST; return; } for (q = 0; q < fs->num_request_queues; q++) { for_each_cpu(cpu, &masks[q]) fs->mq_map[cpu] = q + VQ_REQUEST; } kfree(masks); } /* Virtqueue interrupt handler */ static void virtio_fs_vq_done(struct virtqueue *vq) { struct virtio_fs_vq *fsvq = vq_to_fsvq(vq); dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name); schedule_work(&fsvq->done_work); } static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name, int vq_type) { strscpy(fsvq->name, name, VQ_NAME_LEN); spin_lock_init(&fsvq->lock); INIT_LIST_HEAD(&fsvq->queued_reqs); INIT_LIST_HEAD(&fsvq->end_reqs); init_completion(&fsvq->in_flight_zero); if (vq_type == VQ_REQUEST) { INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work); INIT_WORK(&fsvq->dispatch_work, virtio_fs_request_dispatch_work); } else { INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work); INIT_WORK(&fsvq->dispatch_work, virtio_fs_hiprio_dispatch_work); } } /* Initialize virtqueues */ static int virtio_fs_setup_vqs(struct virtio_device *vdev, struct virtio_fs *fs) { struct virtqueue_info *vqs_info; struct virtqueue **vqs; /* Specify pre_vectors to ensure that the queues before the * request queues (e.g. hiprio) don't claim any of the CPUs in * the multi-queue mapping and interrupt affinities */ struct irq_affinity desc = { .pre_vectors = VQ_REQUEST }; unsigned int i; int ret = 0; virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues, &fs->num_request_queues); if (fs->num_request_queues == 0) return -EINVAL; /* Truncate nr of request queues to nr_cpu_id */ fs->num_request_queues = min_t(unsigned int, fs->num_request_queues, nr_cpu_ids); fs->nvqs = VQ_REQUEST + fs->num_request_queues; fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); if (!fs->vqs) return -ENOMEM; vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL); fs->mq_map = kcalloc_node(nr_cpu_ids, sizeof(*fs->mq_map), GFP_KERNEL, dev_to_node(&vdev->dev)); vqs_info = kcalloc(fs->nvqs, sizeof(*vqs_info), GFP_KERNEL); if (!vqs || !vqs_info || !fs->mq_map) { ret = -ENOMEM; goto out; } /* Initialize the hiprio/forget request virtqueue */ vqs_info[VQ_HIPRIO].callback = virtio_fs_vq_done; virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO); vqs_info[VQ_HIPRIO].name = fs->vqs[VQ_HIPRIO].name; /* Initialize the requests virtqueues */ for (i = VQ_REQUEST; i < fs->nvqs; i++) { char vq_name[VQ_NAME_LEN]; snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST); virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST); vqs_info[i].callback = virtio_fs_vq_done; vqs_info[i].name = fs->vqs[i].name; } ret = virtio_find_vqs(vdev, fs->nvqs, vqs, vqs_info, &desc); if (ret < 0) goto out; for (i = 0; i < fs->nvqs; i++) fs->vqs[i].vq = vqs[i]; virtio_fs_start_all_queues(fs); out: kfree(vqs_info); kfree(vqs); if (ret) { kfree(fs->vqs); kfree(fs->mq_map); } return ret; } /* Free virtqueues (device must already be reset) */ static void virtio_fs_cleanup_vqs(struct virtio_device *vdev) { vdev->config->del_vqs(vdev); } /* Map a window offset to a page frame number. The window offset will have * been produced by .iomap_begin(), which maps a file offset to a window * offset. */ static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, enum dax_access_mode mode, void **kaddr, pfn_t *pfn) { struct virtio_fs *fs = dax_get_private(dax_dev); phys_addr_t offset = PFN_PHYS(pgoff); size_t max_nr_pages = fs->window_len / PAGE_SIZE - pgoff; if (kaddr) *kaddr = fs->window_kaddr + offset; if (pfn) *pfn = phys_to_pfn_t(fs->window_phys_addr + offset, 0); return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; } static int virtio_fs_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, size_t nr_pages) { long rc; void *kaddr; rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr, NULL); if (rc < 0) return dax_mem2blk_err(rc); memset(kaddr, 0, nr_pages << PAGE_SHIFT); dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); return 0; } static const struct dax_operations virtio_fs_dax_ops = { .direct_access = virtio_fs_direct_access, .zero_page_range = virtio_fs_zero_page_range, }; static void virtio_fs_cleanup_dax(void *data) { struct dax_device *dax_dev = data; kill_dax(dax_dev); put_dax(dax_dev); } DEFINE_FREE(cleanup_dax, struct dax_dev *, if (!IS_ERR_OR_NULL(_T)) virtio_fs_cleanup_dax(_T)) static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) { struct dax_device *dax_dev __free(cleanup_dax) = NULL; struct virtio_shm_region cache_reg; struct dev_pagemap *pgmap; bool have_cache; if (!IS_ENABLED(CONFIG_FUSE_DAX)) return 0; dax_dev = alloc_dax(fs, &virtio_fs_dax_ops); if (IS_ERR(dax_dev)) { int rc = PTR_ERR(dax_dev); return rc == -EOPNOTSUPP ? 0 : rc; } /* Get cache region */ have_cache = virtio_get_shm_region(vdev, &cache_reg, (u8)VIRTIO_FS_SHMCAP_ID_CACHE); if (!have_cache) { dev_notice(&vdev->dev, "%s: No cache capability\n", __func__); return 0; } if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len, dev_name(&vdev->dev))) { dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n", cache_reg.addr, cache_reg.len); return -EBUSY; } dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len, cache_reg.addr); pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL); if (!pgmap) return -ENOMEM; pgmap->type = MEMORY_DEVICE_FS_DAX; /* Ideally we would directly use the PCI BAR resource but * devm_memremap_pages() wants its own copy in pgmap. So * initialize a struct resource from scratch (only the start * and end fields will be used). */ pgmap->range = (struct range) { .start = (phys_addr_t) cache_reg.addr, .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1, }; pgmap->nr_range = 1; fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap); if (IS_ERR(fs->window_kaddr)) return PTR_ERR(fs->window_kaddr); fs->window_phys_addr = (phys_addr_t) cache_reg.addr; fs->window_len = (phys_addr_t) cache_reg.len; dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); fs->dax_dev = no_free_ptr(dax_dev); return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax, fs->dax_dev); } static int virtio_fs_probe(struct virtio_device *vdev) { struct virtio_fs *fs; int ret; fs = kzalloc(sizeof(*fs), GFP_KERNEL); if (!fs) return -ENOMEM; kobject_init(&fs->kobj, &virtio_fs_ktype); vdev->priv = fs; ret = virtio_fs_read_tag(vdev, fs); if (ret < 0) goto out; ret = virtio_fs_setup_vqs(vdev, fs); if (ret < 0) goto out; virtio_fs_map_queues(vdev, fs); ret = virtio_fs_setup_dax(vdev, fs); if (ret < 0) goto out_vqs; /* Bring the device online in case the filesystem is mounted and * requests need to be sent before we return. */ virtio_device_ready(vdev); ret = virtio_fs_add_instance(vdev, fs); if (ret < 0) goto out_vqs; return 0; out_vqs: virtio_reset_device(vdev); virtio_fs_cleanup_vqs(vdev); out: vdev->priv = NULL; kobject_put(&fs->kobj); return ret; } static void virtio_fs_stop_all_queues(struct virtio_fs *fs) { struct virtio_fs_vq *fsvq; int i; for (i = 0; i < fs->nvqs; i++) { fsvq = &fs->vqs[i]; spin_lock(&fsvq->lock); fsvq->connected = false; spin_unlock(&fsvq->lock); } } static void virtio_fs_remove(struct virtio_device *vdev) { struct virtio_fs *fs = vdev->priv; mutex_lock(&virtio_fs_mutex); /* This device is going away. No one should get new reference */ list_del_init(&fs->list); virtio_fs_delete_queues_sysfs(fs); sysfs_remove_link(&fs->kobj, "device"); kobject_put(fs->mqs_kobj); kobject_del(&fs->kobj); virtio_fs_stop_all_queues(fs); virtio_fs_drain_all_queues_locked(fs); virtio_reset_device(vdev); virtio_fs_cleanup_vqs(vdev); vdev->priv = NULL; /* Put device reference on virtio_fs object */ virtio_fs_put_locked(fs); mutex_unlock(&virtio_fs_mutex); } #ifdef CONFIG_PM_SLEEP static int virtio_fs_freeze(struct virtio_device *vdev) { /* TODO need to save state here */ pr_warn("virtio-fs: suspend/resume not yet supported\n"); return -EOPNOTSUPP; } static int virtio_fs_restore(struct virtio_device *vdev) { /* TODO need to restore state here */ return 0; } #endif /* CONFIG_PM_SLEEP */ static const struct virtio_device_id id_table[] = { { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID }, {}, }; static const unsigned int feature_table[] = {}; static struct virtio_driver virtio_fs_driver = { .driver.name = KBUILD_MODNAME, .id_table = id_table, .feature_table = feature_table, .feature_table_size = ARRAY_SIZE(feature_table), .probe = virtio_fs_probe, .remove = virtio_fs_remove, #ifdef CONFIG_PM_SLEEP .freeze = virtio_fs_freeze, .restore = virtio_fs_restore, #endif }; static void virtio_fs_send_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *link) { struct virtio_fs_forget *forget; struct virtio_fs_forget_req *req; struct virtio_fs *fs = fiq->priv; struct virtio_fs_vq *fsvq = &fs->vqs[VQ_HIPRIO]; u64 unique = fuse_get_unique(fiq); /* Allocate a buffer for the request */ forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); req = &forget->req; req->ih = (struct fuse_in_header){ .opcode = FUSE_FORGET, .nodeid = link->forget_one.nodeid, .unique = unique, .len = sizeof(*req), }; req->arg = (struct fuse_forget_in){ .nlookup = link->forget_one.nlookup, }; send_forget_request(fsvq, forget, false); kfree(link); } static void virtio_fs_send_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) { /* * TODO interrupts. * * Normal fs operations on a local filesystems aren't interruptible. * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) * with shared lock between host and guest. */ } /* Count number of scatter-gather elements required */ static unsigned int sg_count_fuse_folios(struct fuse_folio_desc *folio_descs, unsigned int num_folios, unsigned int total_len) { unsigned int i; unsigned int this_len; for (i = 0; i < num_folios && total_len; i++) { this_len = min(folio_descs[i].length, total_len); total_len -= this_len; } return i; } /* Return the number of scatter-gather list elements required */ static unsigned int sg_count_fuse_req(struct fuse_req *req) { struct fuse_args *args = req->args; struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); unsigned int size, total_sgs = 1 /* fuse_in_header */; if (args->in_numargs - args->in_pages) total_sgs += 1; if (args->in_pages) { size = args->in_args[args->in_numargs - 1].size; total_sgs += sg_count_fuse_folios(ap->descs, ap->num_folios, size); } if (!test_bit(FR_ISREPLY, &req->flags)) return total_sgs; total_sgs += 1 /* fuse_out_header */; if (args->out_numargs - args->out_pages) total_sgs += 1; if (args->out_pages) { size = args->out_args[args->out_numargs - 1].size; total_sgs += sg_count_fuse_folios(ap->descs, ap->num_folios, size); } return total_sgs; } /* Add folios to scatter-gather list and return number of elements used */ static unsigned int sg_init_fuse_folios(struct scatterlist *sg, struct folio **folios, struct fuse_folio_desc *folio_descs, unsigned int num_folios, unsigned int total_len) { unsigned int i; unsigned int this_len; for (i = 0; i < num_folios && total_len; i++) { sg_init_table(&sg[i], 1); this_len = min(folio_descs[i].length, total_len); sg_set_folio(&sg[i], folios[i], this_len, folio_descs[i].offset); total_len -= this_len; } return i; } /* Add args to scatter-gather list and return number of elements used */ static unsigned int sg_init_fuse_args(struct scatterlist *sg, struct fuse_req *req, struct fuse_arg *args, unsigned int numargs, bool argpages, void *argbuf, unsigned int *len_used) { struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); unsigned int total_sgs = 0; unsigned int len; len = fuse_len_args(numargs - argpages, args); if (len) sg_init_one(&sg[total_sgs++], argbuf, len); if (argpages) total_sgs += sg_init_fuse_folios(&sg[total_sgs], ap->folios, ap->descs, ap->num_folios, args[numargs - 1].size); if (len_used) *len_used = len; return total_sgs; } /* Add a request to a virtqueue and kick the device */ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, struct fuse_req *req, bool in_flight, gfp_t gfp) { /* requests need at least 4 elements */ struct scatterlist *stack_sgs[6]; struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; struct scatterlist **sgs = stack_sgs; struct scatterlist *sg = stack_sg; struct virtqueue *vq; struct fuse_args *args = req->args; unsigned int argbuf_used = 0; unsigned int out_sgs = 0; unsigned int in_sgs = 0; unsigned int total_sgs; unsigned int i; int ret; bool notify; struct fuse_pqueue *fpq; /* Does the sglist fit on the stack? */ total_sgs = sg_count_fuse_req(req); if (total_sgs > ARRAY_SIZE(stack_sgs)) { sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), gfp); sg = kmalloc_array(total_sgs, sizeof(sg[0]), gfp); if (!sgs || !sg) { ret = -ENOMEM; goto out; } } /* Use a bounce buffer since stack args cannot be mapped */ ret = copy_args_to_argbuf(req, gfp); if (ret < 0) goto out; /* Request elements */ sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h)); out_sgs += sg_init_fuse_args(&sg[out_sgs], req, (struct fuse_arg *)args->in_args, args->in_numargs, args->in_pages, req->argbuf, &argbuf_used); /* Reply elements */ if (test_bit(FR_ISREPLY, &req->flags)) { sg_init_one(&sg[out_sgs + in_sgs++], &req->out.h, sizeof(req->out.h)); in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req, args->out_args, args->out_numargs, args->out_pages, req->argbuf + argbuf_used, NULL); } WARN_ON(out_sgs + in_sgs != total_sgs); for (i = 0; i < total_sgs; i++) sgs[i] = &sg[i]; spin_lock(&fsvq->lock); if (!fsvq->connected) { spin_unlock(&fsvq->lock); ret = -ENOTCONN; goto out; } vq = fsvq->vq; ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); if (ret < 0) { spin_unlock(&fsvq->lock); goto out; } /* Request successfully sent. */ fpq = &fsvq->fud->pq; spin_lock(&fpq->lock); list_add_tail(&req->list, fpq->processing); spin_unlock(&fpq->lock); set_bit(FR_SENT, &req->flags); /* matches barrier in request_wait_answer() */ smp_mb__after_atomic(); if (!in_flight) inc_in_flight_req(fsvq); notify = virtqueue_kick_prepare(vq); spin_unlock(&fsvq->lock); if (notify) virtqueue_notify(vq); out: if (ret < 0 && req->argbuf) { kfree(req->argbuf); req->argbuf = NULL; } if (sgs != stack_sgs) { kfree(sgs); kfree(sg); } return ret; } static void virtio_fs_send_req(struct fuse_iqueue *fiq, struct fuse_req *req) { unsigned int queue_id; struct virtio_fs *fs; struct virtio_fs_vq *fsvq; int ret; if (req->in.h.opcode != FUSE_NOTIFY_REPLY) req->in.h.unique = fuse_get_unique(fiq); clear_bit(FR_PENDING, &req->flags); fs = fiq->priv; queue_id = fs->mq_map[raw_smp_processor_id()]; pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u queue_id %u\n", __func__, req->in.h.opcode, req->in.h.unique, req->in.h.nodeid, req->in.h.len, fuse_len_args(req->args->out_numargs, req->args->out_args), queue_id); fsvq = &fs->vqs[queue_id]; ret = virtio_fs_enqueue_req(fsvq, req, false, GFP_ATOMIC); if (ret < 0) { if (ret == -ENOSPC) { /* * Virtqueue full. Retry submission from worker * context as we might be holding fc->bg_lock. */ spin_lock(&fsvq->lock); list_add_tail(&req->list, &fsvq->queued_reqs); inc_in_flight_req(fsvq); spin_unlock(&fsvq->lock); return; } req->out.h.error = ret; pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); /* Can't end request in submission context. Use a worker */ spin_lock(&fsvq->lock); list_add_tail(&req->list, &fsvq->end_reqs); schedule_work(&fsvq->dispatch_work); spin_unlock(&fsvq->lock); return; } } static const struct fuse_iqueue_ops virtio_fs_fiq_ops = { .send_forget = virtio_fs_send_forget, .send_interrupt = virtio_fs_send_interrupt, .send_req = virtio_fs_send_req, .release = virtio_fs_fiq_release, }; static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx) { ctx->rootmode = S_IFDIR; ctx->default_permissions = 1; ctx->allow_other = 1; ctx->max_read = UINT_MAX; ctx->blksize = 512; ctx->destroy = true; ctx->no_control = true; ctx->no_force_umount = true; } static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) { struct fuse_mount *fm = get_fuse_mount_super(sb); struct fuse_conn *fc = fm->fc; struct virtio_fs *fs = fc->iq.priv; struct fuse_fs_context *ctx = fsc->fs_private; unsigned int i; int err; virtio_fs_ctx_set_defaults(ctx); mutex_lock(&virtio_fs_mutex); /* After holding mutex, make sure virtiofs device is still there. * Though we are holding a reference to it, drive ->remove might * still have cleaned up virtual queues. In that case bail out. */ err = -EINVAL; if (list_empty(&fs->list)) { pr_info("virtio-fs: tag <%s> not found\n", fs->tag); goto err; } err = -ENOMEM; /* Allocate fuse_dev for hiprio and notification queues */ for (i = 0; i < fs->nvqs; i++) { struct virtio_fs_vq *fsvq = &fs->vqs[i]; fsvq->fud = fuse_dev_alloc(); if (!fsvq->fud) goto err_free_fuse_devs; } /* virtiofs allocates and installs its own fuse devices */ ctx->fudptr = NULL; if (ctx->dax_mode != FUSE_DAX_NEVER) { if (ctx->dax_mode == FUSE_DAX_ALWAYS && !fs->dax_dev) { err = -EINVAL; pr_err("virtio-fs: dax can't be enabled as filesystem" " device does not support it.\n"); goto err_free_fuse_devs; } ctx->dax_dev = fs->dax_dev; } err = fuse_fill_super_common(sb, ctx); if (err < 0) goto err_free_fuse_devs; for (i = 0; i < fs->nvqs; i++) { struct virtio_fs_vq *fsvq = &fs->vqs[i]; fuse_dev_install(fsvq->fud, fc); } /* Previous unmount will stop all queues. Start these again */ virtio_fs_start_all_queues(fs); fuse_send_init(fm); mutex_unlock(&virtio_fs_mutex); return 0; err_free_fuse_devs: virtio_fs_free_devs(fs); err: mutex_unlock(&virtio_fs_mutex); return err; } static void virtio_fs_conn_destroy(struct fuse_mount *fm) { struct fuse_conn *fc = fm->fc; struct virtio_fs *vfs = fc->iq.priv; struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO]; /* Stop dax worker. Soon evict_inodes() will be called which * will free all memory ranges belonging to all inodes. */ if (IS_ENABLED(CONFIG_FUSE_DAX)) fuse_dax_cancel_work(fc); /* Stop forget queue. Soon destroy will be sent */ spin_lock(&fsvq->lock); fsvq->connected = false; spin_unlock(&fsvq->lock); virtio_fs_drain_all_queues(vfs); fuse_conn_destroy(fm); /* fuse_conn_destroy() must have sent destroy. Stop all queues * and drain one more time and free fuse devices. Freeing fuse * devices will drop their reference on fuse_conn and that in * turn will drop its reference on virtio_fs object. */ virtio_fs_stop_all_queues(vfs); virtio_fs_drain_all_queues(vfs); virtio_fs_free_devs(vfs); } static void virtio_kill_sb(struct super_block *sb) { struct fuse_mount *fm = get_fuse_mount_super(sb); bool last; /* If mount failed, we can still be called without any fc */ if (sb->s_root) { last = fuse_mount_remove(fm); if (last) virtio_fs_conn_destroy(fm); } kill_anon_super(sb); fuse_mount_destroy(fm); } static int virtio_fs_test_super(struct super_block *sb, struct fs_context *fsc) { struct fuse_mount *fsc_fm = fsc->s_fs_info; struct fuse_mount *sb_fm = get_fuse_mount_super(sb); return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv; } static int virtio_fs_get_tree(struct fs_context *fsc) { struct virtio_fs *fs; struct super_block *sb; struct fuse_conn *fc = NULL; struct fuse_mount *fm; unsigned int virtqueue_size; int err = -EIO; if (!fsc->source) return invalf(fsc, "No source specified"); /* This gets a reference on virtio_fs object. This ptr gets installed * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() * to drop the reference to this object. */ fs = virtio_fs_find_instance(fsc->source); if (!fs) { pr_info("virtio-fs: tag <%s> not found\n", fsc->source); return -EINVAL; } virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq); if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD)) goto out_err; err = -ENOMEM; fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); if (!fc) goto out_err; fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); if (!fm) goto out_err; fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs); fc->release = fuse_free_conn; fc->delete_stale = true; fc->auto_submounts = true; fc->sync_fs = true; fc->use_pages_for_kvec_io = true; /* Tell FUSE to split requests that exceed the virtqueue's size */ fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit, virtqueue_size - FUSE_HEADER_OVERHEAD); fsc->s_fs_info = fm; sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc); if (fsc->s_fs_info) fuse_mount_destroy(fm); if (IS_ERR(sb)) return PTR_ERR(sb); if (!sb->s_root) { err = virtio_fs_fill_super(sb, fsc); if (err) { deactivate_locked_super(sb); return err; } sb->s_flags |= SB_ACTIVE; } WARN_ON(fsc->root); fsc->root = dget(sb->s_root); return 0; out_err: kfree(fc); virtio_fs_put(fs); return err; } static const struct fs_context_operations virtio_fs_context_ops = { .free = virtio_fs_free_fsc, .parse_param = virtio_fs_parse_param, .get_tree = virtio_fs_get_tree, }; static int virtio_fs_init_fs_context(struct fs_context *fsc) { struct fuse_fs_context *ctx; if (fsc->purpose == FS_CONTEXT_FOR_SUBMOUNT) return fuse_init_fs_context_submount(fsc); ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL); if (!ctx) return -ENOMEM; fsc->fs_private = ctx; fsc->ops = &virtio_fs_context_ops; return 0; } static struct file_system_type virtio_fs_type = { .owner = THIS_MODULE, .name = "virtiofs", .init_fs_context = virtio_fs_init_fs_context, .kill_sb = virtio_kill_sb, .fs_flags = FS_ALLOW_IDMAP, }; static int virtio_fs_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) { const struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj); add_uevent_var(env, "TAG=%s", fs->tag); return 0; } static const struct kset_uevent_ops virtio_fs_uevent_ops = { .uevent = virtio_fs_uevent, }; static int __init virtio_fs_sysfs_init(void) { virtio_fs_kset = kset_create_and_add("virtiofs", &virtio_fs_uevent_ops, fs_kobj); if (!virtio_fs_kset) return -ENOMEM; return 0; } static void virtio_fs_sysfs_exit(void) { kset_unregister(virtio_fs_kset); virtio_fs_kset = NULL; } static int __init virtio_fs_init(void) { int ret; ret = virtio_fs_sysfs_init(); if (ret < 0) return ret; ret = register_virtio_driver(&virtio_fs_driver); if (ret < 0) goto sysfs_exit; ret = register_filesystem(&virtio_fs_type); if (ret < 0) goto unregister_virtio_driver; return 0; unregister_virtio_driver: unregister_virtio_driver(&virtio_fs_driver); sysfs_exit: virtio_fs_sysfs_exit(); return ret; } module_init(virtio_fs_init); static void __exit virtio_fs_exit(void) { unregister_filesystem(&virtio_fs_type); unregister_virtio_driver(&virtio_fs_driver); virtio_fs_sysfs_exit(); } module_exit(virtio_fs_exit); MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>"); MODULE_DESCRIPTION("Virtio Filesystem"); MODULE_LICENSE("GPL"); MODULE_ALIAS_FS(KBUILD_MODNAME); MODULE_DEVICE_TABLE(virtio, id_table); |
| 48 48 40 9 48 4 4 4 4 8 9 2 7 37 37 37 37 37 37 19 19 24 24 2 22 24 24 4 24 6 9 14 6 5 5 1 1 6 11 10 10 1 11 6 6 6 1 6 6 6 4 6 1 6 1 1 1 2 2 1 2 6 1 1 6 1 1 6 32 15 14 6 1 6 6 6 6 6 1 7 7 9 9 9 1 1 1 1 1 1 3 3 3 3 2 2 1 1 2 2 3 7 7 7 2 2 2 2 2 2 2 2 1 4 6 6 6 9 8 6 4 3 2 1 1 13 9 13 1 1 1 1 12 11 2 11 10 3 3 2 1 10 1 1 1 1 1 9 8 8 8 8 5 8 8 9 8 7 7 7 7 7 2 2 1 11 2 2 2 5 2 1 2 6 6 6 2 6 6 6 6 55 28 21 21 55 34 8 3 56 4 4 4 2 4 4 4 3 3 3 2 2 2 2 2 1 2 2 1 1 1 1 1 1 1 3 3 5 107 107 106 14 107 1 1 1 5 100 99 100 21 17 13 6 2 99 100 92 92 6 6 2 6 92 1 92 92 2 92 92 1 92 92 92 2 10 10 100 7 7 7 4 2 1 7 7 6 1 6 6 4 6 6 4 2 2 6 6 6 2 1 1 1 1 1 1 1 1 2 6 6 6 6 6 6 6 6 6 6 1 1 6 6 6 5 1 1 1 6 6 6 4 6 6 6 6 6 6 7 2 7 24 19 12 23 24 23 24 23 8 4 4 8 8 7 6 5 5 2 6 6 4 4 1 3 3 4 4 3 4 6 8 75 1 63 61 69 63 4 64 63 63 23 21 45 65 64 3 64 39 24 62 1 1 62 40 63 11 63 60 24 37 28 13 2 54 56 57 1 55 56 2 1 53 55 54 22 16 3 49 2 46 46 12 23 68 77 75 69 9 8 8 8 7 6 8 8 2 8 8 1 8 7 2 8 8 9 4 9 9 8 1 7 3 7 3 6 4 4 4 1 6 6 6 6 3 3 3 2 5 9 1 1 1 8 8 8 25 24 23 23 23 23 23 20 23 23 23 23 25 7 6 6 1 5 5 4 4 4 3 2 2 2 3 3 1 1 1 3 3 5 7 3 1 1 1 1 3 4 2 2 2 2 2 4 6 2 2 1 6 2 1 1 8 7 6 6 1 1 1 1 6 2 8 2 1 1 2 2 2 2 2 2 102 100 100 10 12 11 11 11 10 8 58 33 27 26 6 6 31 29 2 1 1 4 3 3 2 2 1 3 2 2 2 2 1 3 2 2 2 2 3 2 2 2 1 1 7 7 6 6 2 5 5 4 10 2 1 1 14 13 13 14 13 3 12 2 2 1 3 3 2 2 2 2 3 2 2 26 26 26 22 2 1 1 1 1 1 1 1 2 5 4 2 1 1 1 1 1 2 1 1 1 1 1 19 18 445 450 450 29 7 10 4 450 446 34 1 1 1 1 2 34 2 1 2 2 1 1 1 1 1 1 5 5 4 5 5 4 17 3 20 20 20 32 32 31 31 32 30 29 23 6 29 28 27 6 26 25 24 23 22 21 21 16 5 2 3 2 2 11 4 4 16 14 16 15 15 13 16 16 16 14 16 3 1 5 28 6 5 5 5 4 5 4 3 3 3 3 3 3 3 3 5 6 3 3 3 3 3 3 3 3 3 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 | // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * PACKET - implements raw packet sockets. * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox, <gw4pts@gw4pts.ampr.org> * * Fixes: * Alan Cox : verify_area() now used correctly * Alan Cox : new skbuff lists, look ma no backlogs! * Alan Cox : tidied skbuff lists. * Alan Cox : Now uses generic datagram routines I * added. Also fixed the peek/read crash * from all old Linux datagram code. * Alan Cox : Uses the improved datagram code. * Alan Cox : Added NULL's for socket options. * Alan Cox : Re-commented the code. * Alan Cox : Use new kernel side addressing * Rob Janssen : Correct MTU usage. * Dave Platt : Counter leaks caused by incorrect * interrupt locking and some slightly * dubious gcc output. Can you read * compiler: it said _VOLATILE_ * Richard Kooijman : Timestamp fixes. * Alan Cox : New buffers. Use sk->mac.raw. * Alan Cox : sendmsg/recvmsg support. * Alan Cox : Protocol setting support * Alexey Kuznetsov : Untied from IPv4 stack. * Cyrus Durgin : Fixed kerneld for kmod. * Michal Ostrowski : Module initialization cleanup. * Ulises Alonso : Frame number limit removal and * packet_set_ring memory leak. * Eric Biederman : Allow for > 8 byte hardware addresses. * The convention is that longer addresses * will simply extend the hardware address * byte arrays at the end of sockaddr_ll * and packet_mreq. * Johann Baudy : Added TX RING. * Chetan Loke : Implemented TPACKET_V3 block abstraction * layer. * Copyright (C) 2011, <lokec@ccs.neu.edu> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/ethtool.h> #include <linux/filter.h> #include <linux/types.h> #include <linux/mm.h> #include <linux/capability.h> #include <linux/fcntl.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/if_packet.h> #include <linux/wireless.h> #include <linux/kernel.h> #include <linux/kmod.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <net/net_namespace.h> #include <net/ip.h> #include <net/protocol.h> #include <linux/skbuff.h> #include <net/sock.h> #include <linux/errno.h> #include <linux/timer.h> #include <linux/uaccess.h> #include <asm/ioctls.h> #include <asm/page.h> #include <asm/cacheflush.h> #include <asm/io.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/poll.h> #include <linux/module.h> #include <linux/init.h> #include <linux/mutex.h> #include <linux/if_vlan.h> #include <linux/virtio_net.h> #include <linux/errqueue.h> #include <linux/net_tstamp.h> #include <linux/percpu.h> #ifdef CONFIG_INET #include <net/inet_common.h> #endif #include <linux/bpf.h> #include <net/compat.h> #include <linux/netfilter_netdev.h> #include "internal.h" /* Assumptions: - If the device has no dev->header_ops->create, there is no LL header visible above the device. In this case, its hard_header_len should be 0. The device may prepend its own header internally. In this case, its needed_headroom should be set to the space needed for it to add its internal header. For example, a WiFi driver pretending to be an Ethernet driver should set its hard_header_len to be the Ethernet header length, and set its needed_headroom to be (the real WiFi header length - the fake Ethernet header length). - packet socket receives packets with pulled ll header, so that SOCK_RAW should push it back. On receive: ----------- Incoming, dev_has_header(dev) == true mac_header -> ll header data -> data Outgoing, dev_has_header(dev) == true mac_header -> ll header data -> ll header Incoming, dev_has_header(dev) == false mac_header -> data However drivers often make it point to the ll header. This is incorrect because the ll header should be invisible to us. data -> data Outgoing, dev_has_header(dev) == false mac_header -> data. ll header is invisible to us. data -> data Resume If dev_has_header(dev) == false we are unable to restore the ll header, because it is invisible to us. On transmit: ------------ dev_has_header(dev) == true mac_header -> ll header data -> ll header dev_has_header(dev) == false (ll header is invisible to us) mac_header -> data data -> data We should set network_header on output to the correct position, packet classifier depends on it. */ /* Private packet socket structures. */ /* identical to struct packet_mreq except it has * a longer address field. */ struct packet_mreq_max { int mr_ifindex; unsigned short mr_type; unsigned short mr_alen; unsigned char mr_address[MAX_ADDR_LEN]; }; union tpacket_uhdr { struct tpacket_hdr *h1; struct tpacket2_hdr *h2; struct tpacket3_hdr *h3; void *raw; }; static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, int closing, int tx_ring); #define V3_ALIGNMENT (8) #define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT)) #define BLK_PLUS_PRIV(sz_of_priv) \ (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT)) #define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status) #define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts) #define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt) #define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len) #define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num) #define BLOCK_O2PRIV(x) ((x)->offset_to_priv) struct packet_sock; static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); static void *packet_previous_frame(struct packet_sock *po, struct packet_ring_buffer *rb, int status); static void packet_increment_head(struct packet_ring_buffer *buff); static int prb_curr_blk_in_use(struct tpacket_block_desc *); static void *prb_dispatch_next_block(struct tpacket_kbdq_core *, struct packet_sock *); static void prb_retire_current_block(struct tpacket_kbdq_core *, struct packet_sock *, unsigned int status); static int prb_queue_frozen(struct tpacket_kbdq_core *); static void prb_open_block(struct tpacket_kbdq_core *, struct tpacket_block_desc *); static void prb_retire_rx_blk_timer_expired(struct timer_list *); static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *); static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *); static void prb_clear_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *); static void prb_fill_vlan_info(struct tpacket_kbdq_core *, struct tpacket3_hdr *); static void packet_flush_mclist(struct sock *sk); static u16 packet_pick_tx_queue(struct sk_buff *skb); struct packet_skb_cb { union { struct sockaddr_pkt pkt; union { /* Trick: alias skb original length with * ll.sll_family and ll.protocol in order * to save room. */ unsigned int origlen; struct sockaddr_ll ll; }; } sa; }; #define vio_le() virtio_legacy_is_little_endian() #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) #define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc)) #define GET_PBLOCK_DESC(x, bid) \ ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer)) #define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \ ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer)) #define GET_NEXT_PRB_BLK_NUM(x) \ (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \ ((x)->kactive_blk_num+1) : 0) static void __fanout_unlink(struct sock *sk, struct packet_sock *po); static void __fanout_link(struct sock *sk, struct packet_sock *po); #ifdef CONFIG_NETFILTER_EGRESS static noinline struct sk_buff *nf_hook_direct_egress(struct sk_buff *skb) { struct sk_buff *next, *head = NULL, *tail; int rc; rcu_read_lock(); for (; skb != NULL; skb = next) { next = skb->next; skb_mark_not_on_list(skb); if (!nf_hook_egress(skb, &rc, skb->dev)) continue; if (!head) head = skb; else tail->next = skb; tail = skb; } rcu_read_unlock(); return head; } #endif static int packet_xmit(const struct packet_sock *po, struct sk_buff *skb) { if (!packet_sock_flag(po, PACKET_SOCK_QDISC_BYPASS)) return dev_queue_xmit(skb); #ifdef CONFIG_NETFILTER_EGRESS if (nf_hook_egress_active()) { skb = nf_hook_direct_egress(skb); if (!skb) return NET_XMIT_DROP; } #endif return dev_direct_xmit(skb, packet_pick_tx_queue(skb)); } static struct net_device *packet_cached_dev_get(struct packet_sock *po) { struct net_device *dev; rcu_read_lock(); dev = rcu_dereference(po->cached_dev); dev_hold(dev); rcu_read_unlock(); return dev; } static void packet_cached_dev_assign(struct packet_sock *po, struct net_device *dev) { rcu_assign_pointer(po->cached_dev, dev); } static void packet_cached_dev_reset(struct packet_sock *po) { RCU_INIT_POINTER(po->cached_dev, NULL); } static u16 packet_pick_tx_queue(struct sk_buff *skb) { struct net_device *dev = skb->dev; const struct net_device_ops *ops = dev->netdev_ops; int cpu = raw_smp_processor_id(); u16 queue_index; #ifdef CONFIG_XPS skb->sender_cpu = cpu + 1; #endif skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues); if (ops->ndo_select_queue) { queue_index = ops->ndo_select_queue(dev, skb, NULL); queue_index = netdev_cap_txqueue(dev, queue_index); } else { queue_index = netdev_pick_tx(dev, skb, NULL); } return queue_index; } /* __register_prot_hook must be invoked through register_prot_hook * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). */ static void __register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); if (!packet_sock_flag(po, PACKET_SOCK_RUNNING)) { if (po->fanout) __fanout_link(sk, po); else dev_add_pack(&po->prot_hook); sock_hold(sk); packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 1); } } static void register_prot_hook(struct sock *sk) { lockdep_assert_held_once(&pkt_sk(sk)->bind_lock); __register_prot_hook(sk); } /* If the sync parameter is true, we will temporarily drop * the po->bind_lock and do a synchronize_net to make sure no * asynchronous packet processing paths still refer to the elements * of po->prot_hook. If the sync parameter is false, it is the * callers responsibility to take care of this. */ static void __unregister_prot_hook(struct sock *sk, bool sync) { struct packet_sock *po = pkt_sk(sk); lockdep_assert_held_once(&po->bind_lock); packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 0); if (po->fanout) __fanout_unlink(sk, po); else __dev_remove_pack(&po->prot_hook); __sock_put(sk); if (sync) { spin_unlock(&po->bind_lock); synchronize_net(); spin_lock(&po->bind_lock); } } static void unregister_prot_hook(struct sock *sk, bool sync) { struct packet_sock *po = pkt_sk(sk); if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) __unregister_prot_hook(sk, sync); } static inline struct page * __pure pgv_to_page(void *addr) { if (is_vmalloc_addr(addr)) return vmalloc_to_page(addr); return virt_to_page(addr); } static void __packet_set_status(struct packet_sock *po, void *frame, int status) { union tpacket_uhdr h; /* WRITE_ONCE() are paired with READ_ONCE() in __packet_get_status */ h.raw = frame; switch (po->tp_version) { case TPACKET_V1: WRITE_ONCE(h.h1->tp_status, status); flush_dcache_page(pgv_to_page(&h.h1->tp_status)); break; case TPACKET_V2: WRITE_ONCE(h.h2->tp_status, status); flush_dcache_page(pgv_to_page(&h.h2->tp_status)); break; case TPACKET_V3: WRITE_ONCE(h.h3->tp_status, status); flush_dcache_page(pgv_to_page(&h.h3->tp_status)); break; default: WARN(1, "TPACKET version not supported.\n"); BUG(); } smp_wmb(); } static int __packet_get_status(const struct packet_sock *po, void *frame) { union tpacket_uhdr h; smp_rmb(); /* READ_ONCE() are paired with WRITE_ONCE() in __packet_set_status */ h.raw = frame; switch (po->tp_version) { case TPACKET_V1: flush_dcache_page(pgv_to_page(&h.h1->tp_status)); return READ_ONCE(h.h1->tp_status); case TPACKET_V2: flush_dcache_page(pgv_to_page(&h.h2->tp_status)); return READ_ONCE(h.h2->tp_status); case TPACKET_V3: flush_dcache_page(pgv_to_page(&h.h3->tp_status)); return READ_ONCE(h.h3->tp_status); default: WARN(1, "TPACKET version not supported.\n"); BUG(); return 0; } } static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts, unsigned int flags) { struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); if (shhwtstamps && (flags & SOF_TIMESTAMPING_RAW_HARDWARE) && ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts)) return TP_STATUS_TS_RAW_HARDWARE; if ((flags & SOF_TIMESTAMPING_SOFTWARE) && ktime_to_timespec64_cond(skb_tstamp(skb), ts)) return TP_STATUS_TS_SOFTWARE; return 0; } static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame, struct sk_buff *skb) { union tpacket_uhdr h; struct timespec64 ts; __u32 ts_status; if (!(ts_status = tpacket_get_timestamp(skb, &ts, READ_ONCE(po->tp_tstamp)))) return 0; h.raw = frame; /* * versions 1 through 3 overflow the timestamps in y2106, since they * all store the seconds in a 32-bit unsigned integer. * If we create a version 4, that should have a 64-bit timestamp, * either 64-bit seconds + 32-bit nanoseconds, or just 64-bit * nanoseconds. */ switch (po->tp_version) { case TPACKET_V1: h.h1->tp_sec = ts.tv_sec; h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC; break; case TPACKET_V2: h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; break; case TPACKET_V3: h.h3->tp_sec = ts.tv_sec; h.h3->tp_nsec = ts.tv_nsec; break; default: WARN(1, "TPACKET version not supported.\n"); BUG(); } /* one flush is safe, as both fields always lie on the same cacheline */ flush_dcache_page(pgv_to_page(&h.h1->tp_sec)); smp_wmb(); return ts_status; } static void *packet_lookup_frame(const struct packet_sock *po, const struct packet_ring_buffer *rb, unsigned int position, int status) { unsigned int pg_vec_pos, frame_offset; union tpacket_uhdr h; pg_vec_pos = position / rb->frames_per_block; frame_offset = position % rb->frames_per_block; h.raw = rb->pg_vec[pg_vec_pos].buffer + (frame_offset * rb->frame_size); if (status != __packet_get_status(po, h.raw)) return NULL; return h.raw; } static void *packet_current_frame(struct packet_sock *po, struct packet_ring_buffer *rb, int status) { return packet_lookup_frame(po, rb, rb->head, status); } static u16 vlan_get_tci(const struct sk_buff *skb, struct net_device *dev) { struct vlan_hdr vhdr, *vh; unsigned int header_len; if (!dev) return 0; /* In the SOCK_DGRAM scenario, skb data starts at the network * protocol, which is after the VLAN headers. The outer VLAN * header is at the hard_header_len offset in non-variable * length link layer headers. If it's a VLAN device, the * min_header_len should be used to exclude the VLAN header * size. */ if (dev->min_header_len == dev->hard_header_len) header_len = dev->hard_header_len; else if (is_vlan_dev(dev)) header_len = dev->min_header_len; else return 0; vh = skb_header_pointer(skb, skb_mac_offset(skb) + header_len, sizeof(vhdr), &vhdr); if (unlikely(!vh)) return 0; return ntohs(vh->h_vlan_TCI); } static __be16 vlan_get_protocol_dgram(const struct sk_buff *skb) { __be16 proto = skb->protocol; if (unlikely(eth_type_vlan(proto))) proto = __vlan_get_protocol_offset(skb, proto, skb_mac_offset(skb), NULL); return proto; } static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc) { timer_delete_sync(&pkc->retire_blk_timer); } static void prb_shutdown_retire_blk_timer(struct packet_sock *po, struct sk_buff_head *rb_queue) { struct tpacket_kbdq_core *pkc; pkc = GET_PBDQC_FROM_RB(&po->rx_ring); spin_lock_bh(&rb_queue->lock); pkc->delete_blk_timer = 1; spin_unlock_bh(&rb_queue->lock); prb_del_retire_blk_timer(pkc); } static void prb_setup_retire_blk_timer(struct packet_sock *po) { struct tpacket_kbdq_core *pkc; pkc = GET_PBDQC_FROM_RB(&po->rx_ring); timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired, 0); pkc->retire_blk_timer.expires = jiffies; } static int prb_calc_retire_blk_tmo(struct packet_sock *po, int blk_size_in_bytes) { struct net_device *dev; unsigned int mbits, div; struct ethtool_link_ksettings ecmd; int err; rtnl_lock(); dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex); if (unlikely(!dev)) { rtnl_unlock(); return DEFAULT_PRB_RETIRE_TOV; } err = __ethtool_get_link_ksettings(dev, &ecmd); rtnl_unlock(); if (err) return DEFAULT_PRB_RETIRE_TOV; /* If the link speed is so slow you don't really * need to worry about perf anyways */ if (ecmd.base.speed < SPEED_1000 || ecmd.base.speed == SPEED_UNKNOWN) return DEFAULT_PRB_RETIRE_TOV; div = ecmd.base.speed / 1000; mbits = (blk_size_in_bytes * 8) / (1024 * 1024); if (div) mbits /= div; if (div) return mbits + 1; return mbits; } static void prb_init_ft_ops(struct tpacket_kbdq_core *p1, union tpacket_req_u *req_u) { p1->feature_req_word = req_u->req3.tp_feature_req_word; } static void init_prb_bdqc(struct packet_sock *po, struct packet_ring_buffer *rb, struct pgv *pg_vec, union tpacket_req_u *req_u) { struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb); struct tpacket_block_desc *pbd; memset(p1, 0x0, sizeof(*p1)); p1->knxt_seq_num = 1; p1->pkbdq = pg_vec; pbd = (struct tpacket_block_desc *)pg_vec[0].buffer; p1->pkblk_start = pg_vec[0].buffer; p1->kblk_size = req_u->req3.tp_block_size; p1->knum_blocks = req_u->req3.tp_block_nr; p1->hdrlen = po->tp_hdrlen; p1->version = po->tp_version; p1->last_kactive_blk_num = 0; po->stats.stats3.tp_freeze_q_cnt = 0; if (req_u->req3.tp_retire_blk_tov) p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov; else p1->retire_blk_tov = prb_calc_retire_blk_tmo(po, req_u->req3.tp_block_size); p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov); p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv; rwlock_init(&p1->blk_fill_in_prog_lock); p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv); prb_init_ft_ops(p1, req_u); prb_setup_retire_blk_timer(po); prb_open_block(p1, pbd); } /* Do NOT update the last_blk_num first. * Assumes sk_buff_head lock is held. */ static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc) { mod_timer(&pkc->retire_blk_timer, jiffies + pkc->tov_in_jiffies); pkc->last_kactive_blk_num = pkc->kactive_blk_num; } /* * Timer logic: * 1) We refresh the timer only when we open a block. * By doing this we don't waste cycles refreshing the timer * on packet-by-packet basis. * * With a 1MB block-size, on a 1Gbps line, it will take * i) ~8 ms to fill a block + ii) memcpy etc. * In this cut we are not accounting for the memcpy time. * * So, if the user sets the 'tmo' to 10ms then the timer * will never fire while the block is still getting filled * (which is what we want). However, the user could choose * to close a block early and that's fine. * * But when the timer does fire, we check whether or not to refresh it. * Since the tmo granularity is in msecs, it is not too expensive * to refresh the timer, lets say every '8' msecs. * Either the user can set the 'tmo' or we can derive it based on * a) line-speed and b) block-size. * prb_calc_retire_blk_tmo() calculates the tmo. * */ static void prb_retire_rx_blk_timer_expired(struct timer_list *t) { struct packet_sock *po = timer_container_of(po, t, rx_ring.prb_bdqc.retire_blk_timer); struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring); unsigned int frozen; struct tpacket_block_desc *pbd; spin_lock(&po->sk.sk_receive_queue.lock); frozen = prb_queue_frozen(pkc); pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); if (unlikely(pkc->delete_blk_timer)) goto out; /* We only need to plug the race when the block is partially filled. * tpacket_rcv: * lock(); increment BLOCK_NUM_PKTS; unlock() * copy_bits() is in progress ... * timer fires on other cpu: * we can't retire the current block because copy_bits * is in progress. * */ if (BLOCK_NUM_PKTS(pbd)) { /* Waiting for skb_copy_bits to finish... */ write_lock(&pkc->blk_fill_in_prog_lock); write_unlock(&pkc->blk_fill_in_prog_lock); } if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) { if (!frozen) { if (!BLOCK_NUM_PKTS(pbd)) { /* An empty block. Just refresh the timer. */ goto refresh_timer; } prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO); if (!prb_dispatch_next_block(pkc, po)) goto refresh_timer; else goto out; } else { /* Case 1. Queue was frozen because user-space was * lagging behind. */ if (prb_curr_blk_in_use(pbd)) { /* * Ok, user-space is still behind. * So just refresh the timer. */ goto refresh_timer; } else { /* Case 2. queue was frozen,user-space caught up, * now the link went idle && the timer fired. * We don't have a block to close.So we open this * block and restart the timer. * opening a block thaws the queue,restarts timer * Thawing/timer-refresh is a side effect. */ prb_open_block(pkc, pbd); goto out; } } } refresh_timer: _prb_refresh_rx_retire_blk_timer(pkc); out: spin_unlock(&po->sk.sk_receive_queue.lock); } static void prb_flush_block(struct tpacket_kbdq_core *pkc1, struct tpacket_block_desc *pbd1, __u32 status) { /* Flush everything minus the block header */ #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 u8 *start, *end; start = (u8 *)pbd1; /* Skip the block header(we know header WILL fit in 4K) */ start += PAGE_SIZE; end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end); for (; start < end; start += PAGE_SIZE) flush_dcache_page(pgv_to_page(start)); smp_wmb(); #endif /* Now update the block status. */ BLOCK_STATUS(pbd1) = status; /* Flush the block header */ #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 start = (u8 *)pbd1; flush_dcache_page(pgv_to_page(start)); smp_wmb(); #endif } /* * Side effect: * * 1) flush the block * 2) Increment active_blk_num * * Note:We DONT refresh the timer on purpose. * Because almost always the next block will be opened. */ static void prb_close_block(struct tpacket_kbdq_core *pkc1, struct tpacket_block_desc *pbd1, struct packet_sock *po, unsigned int stat) { __u32 status = TP_STATUS_USER | stat; struct tpacket3_hdr *last_pkt; struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; struct sock *sk = &po->sk; if (atomic_read(&po->tp_drops)) status |= TP_STATUS_LOSING; last_pkt = (struct tpacket3_hdr *)pkc1->prev; last_pkt->tp_next_offset = 0; /* Get the ts of the last pkt */ if (BLOCK_NUM_PKTS(pbd1)) { h1->ts_last_pkt.ts_sec = last_pkt->tp_sec; h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec; } else { /* Ok, we tmo'd - so get the current time. * * It shouldn't really happen as we don't close empty * blocks. See prb_retire_rx_blk_timer_expired(). */ struct timespec64 ts; ktime_get_real_ts64(&ts); h1->ts_last_pkt.ts_sec = ts.tv_sec; h1->ts_last_pkt.ts_nsec = ts.tv_nsec; } smp_wmb(); /* Flush the block */ prb_flush_block(pkc1, pbd1, status); sk->sk_data_ready(sk); pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1); } static void prb_thaw_queue(struct tpacket_kbdq_core *pkc) { pkc->reset_pending_on_curr_blk = 0; } /* * Side effect of opening a block: * * 1) prb_queue is thawed. * 2) retire_blk_timer is refreshed. * */ static void prb_open_block(struct tpacket_kbdq_core *pkc1, struct tpacket_block_desc *pbd1) { struct timespec64 ts; struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; smp_rmb(); /* We could have just memset this but we will lose the * flexibility of making the priv area sticky */ BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++; BLOCK_NUM_PKTS(pbd1) = 0; BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); ktime_get_real_ts64(&ts); h1->ts_first_pkt.ts_sec = ts.tv_sec; h1->ts_first_pkt.ts_nsec = ts.tv_nsec; pkc1->pkblk_start = (char *)pbd1; pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN; pbd1->version = pkc1->version; pkc1->prev = pkc1->nxt_offset; pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size; prb_thaw_queue(pkc1); _prb_refresh_rx_retire_blk_timer(pkc1); smp_wmb(); } /* * Queue freeze logic: * 1) Assume tp_block_nr = 8 blocks. * 2) At time 't0', user opens Rx ring. * 3) Some time past 't0', kernel starts filling blocks starting from 0 .. 7 * 4) user-space is either sleeping or processing block '0'. * 5) tpacket_rcv is currently filling block '7', since there is no space left, * it will close block-7,loop around and try to fill block '0'. * call-flow: * __packet_lookup_frame_in_block * prb_retire_current_block() * prb_dispatch_next_block() * |->(BLOCK_STATUS == USER) evaluates to true * 5.1) Since block-0 is currently in-use, we just freeze the queue. * 6) Now there are two cases: * 6.1) Link goes idle right after the queue is frozen. * But remember, the last open_block() refreshed the timer. * When this timer expires,it will refresh itself so that we can * re-open block-0 in near future. * 6.2) Link is busy and keeps on receiving packets. This is a simple * case and __packet_lookup_frame_in_block will check if block-0 * is free and can now be re-used. */ static void prb_freeze_queue(struct tpacket_kbdq_core *pkc, struct packet_sock *po) { pkc->reset_pending_on_curr_blk = 1; po->stats.stats3.tp_freeze_q_cnt++; } #define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT)) /* * If the next block is free then we will dispatch it * and return a good offset. * Else, we will freeze the queue. * So, caller must check the return value. */ static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc, struct packet_sock *po) { struct tpacket_block_desc *pbd; smp_rmb(); /* 1. Get current block num */ pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); /* 2. If this block is currently in_use then freeze the queue */ if (TP_STATUS_USER & BLOCK_STATUS(pbd)) { prb_freeze_queue(pkc, po); return NULL; } /* * 3. * open this block and return the offset where the first packet * needs to get stored. */ prb_open_block(pkc, pbd); return (void *)pkc->nxt_offset; } static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, struct packet_sock *po, unsigned int status) { struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); /* retire/close the current block */ if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) { /* * Plug the case where copy_bits() is in progress on * cpu-0 and tpacket_rcv() got invoked on cpu-1, didn't * have space to copy the pkt in the current block and * called prb_retire_current_block() * * We don't need to worry about the TMO case because * the timer-handler already handled this case. */ if (!(status & TP_STATUS_BLK_TMO)) { /* Waiting for skb_copy_bits to finish... */ write_lock(&pkc->blk_fill_in_prog_lock); write_unlock(&pkc->blk_fill_in_prog_lock); } prb_close_block(pkc, pbd, po, status); return; } } static int prb_curr_blk_in_use(struct tpacket_block_desc *pbd) { return TP_STATUS_USER & BLOCK_STATUS(pbd); } static int prb_queue_frozen(struct tpacket_kbdq_core *pkc) { return pkc->reset_pending_on_curr_blk; } static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb) __releases(&pkc->blk_fill_in_prog_lock) { struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); read_unlock(&pkc->blk_fill_in_prog_lock); } static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { ppd->hv1.tp_rxhash = skb_get_hash(pkc->skb); } static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { ppd->hv1.tp_rxhash = 0; } static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { struct packet_sock *po = container_of(pkc, struct packet_sock, rx_ring.prb_bdqc); if (skb_vlan_tag_present(pkc->skb)) { ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb); ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto); ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else if (unlikely(po->sk.sk_type == SOCK_DGRAM && eth_type_vlan(pkc->skb->protocol))) { ppd->hv1.tp_vlan_tci = vlan_get_tci(pkc->skb, pkc->skb->dev); ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->protocol); ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { ppd->hv1.tp_vlan_tci = 0; ppd->hv1.tp_vlan_tpid = 0; ppd->tp_status = TP_STATUS_AVAILABLE; } } static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { ppd->hv1.tp_padding = 0; prb_fill_vlan_info(pkc, ppd); if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH) prb_fill_rxhash(pkc, ppd); else prb_clear_rxhash(pkc, ppd); } static void prb_fill_curr_block(char *curr, struct tpacket_kbdq_core *pkc, struct tpacket_block_desc *pbd, unsigned int len) __acquires(&pkc->blk_fill_in_prog_lock) { struct tpacket3_hdr *ppd; ppd = (struct tpacket3_hdr *)curr; ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len); pkc->prev = curr; pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len); BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len); BLOCK_NUM_PKTS(pbd) += 1; read_lock(&pkc->blk_fill_in_prog_lock); prb_run_all_ft_ops(pkc, ppd); } /* Assumes caller has the sk->rx_queue.lock */ static void *__packet_lookup_frame_in_block(struct packet_sock *po, struct sk_buff *skb, unsigned int len ) { struct tpacket_kbdq_core *pkc; struct tpacket_block_desc *pbd; char *curr, *end; pkc = GET_PBDQC_FROM_RB(&po->rx_ring); pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); /* Queue is frozen when user space is lagging behind */ if (prb_queue_frozen(pkc)) { /* * Check if that last block which caused the queue to freeze, * is still in_use by user-space. */ if (prb_curr_blk_in_use(pbd)) { /* Can't record this packet */ return NULL; } else { /* * Ok, the block was released by user-space. * Now let's open that block. * opening a block also thaws the queue. * Thawing is a side effect. */ prb_open_block(pkc, pbd); } } smp_mb(); curr = pkc->nxt_offset; pkc->skb = skb; end = (char *)pbd + pkc->kblk_size; /* first try the current block */ if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) { prb_fill_curr_block(curr, pkc, pbd, len); return (void *)curr; } /* Ok, close the current block */ prb_retire_current_block(pkc, po, 0); /* Now, try to dispatch the next block */ curr = (char *)prb_dispatch_next_block(pkc, po); if (curr) { pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); prb_fill_curr_block(curr, pkc, pbd, len); return (void *)curr; } /* * No free blocks are available.user_space hasn't caught up yet. * Queue was just frozen and now this packet will get dropped. */ return NULL; } static void *packet_current_rx_frame(struct packet_sock *po, struct sk_buff *skb, int status, unsigned int len) { char *curr = NULL; switch (po->tp_version) { case TPACKET_V1: case TPACKET_V2: curr = packet_lookup_frame(po, &po->rx_ring, po->rx_ring.head, status); return curr; case TPACKET_V3: return __packet_lookup_frame_in_block(po, skb, len); default: WARN(1, "TPACKET version not supported\n"); BUG(); return NULL; } } static void *prb_lookup_block(const struct packet_sock *po, const struct packet_ring_buffer *rb, unsigned int idx, int status) { struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx); if (status != BLOCK_STATUS(pbd)) return NULL; return pbd; } static int prb_previous_blk_num(struct packet_ring_buffer *rb) { unsigned int prev; if (rb->prb_bdqc.kactive_blk_num) prev = rb->prb_bdqc.kactive_blk_num-1; else prev = rb->prb_bdqc.knum_blocks-1; return prev; } /* Assumes caller has held the rx_queue.lock */ static void *__prb_previous_block(struct packet_sock *po, struct packet_ring_buffer *rb, int status) { unsigned int previous = prb_previous_blk_num(rb); return prb_lookup_block(po, rb, previous, status); } static void *packet_previous_rx_frame(struct packet_sock *po, struct packet_ring_buffer *rb, int status) { if (po->tp_version <= TPACKET_V2) return packet_previous_frame(po, rb, status); return __prb_previous_block(po, rb, status); } static void packet_increment_rx_head(struct packet_sock *po, struct packet_ring_buffer *rb) { switch (po->tp_version) { case TPACKET_V1: case TPACKET_V2: return packet_increment_head(rb); case TPACKET_V3: default: WARN(1, "TPACKET version not supported.\n"); BUG(); return; } } static void *packet_previous_frame(struct packet_sock *po, struct packet_ring_buffer *rb, int status) { unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max; return packet_lookup_frame(po, rb, previous, status); } static void packet_increment_head(struct packet_ring_buffer *buff) { buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; } static void packet_inc_pending(struct packet_ring_buffer *rb) { this_cpu_inc(*rb->pending_refcnt); } static void packet_dec_pending(struct packet_ring_buffer *rb) { this_cpu_dec(*rb->pending_refcnt); } static unsigned int packet_read_pending(const struct packet_ring_buffer *rb) { unsigned int refcnt = 0; int cpu; /* We don't use pending refcount in rx_ring. */ if (rb->pending_refcnt == NULL) return 0; for_each_possible_cpu(cpu) refcnt += *per_cpu_ptr(rb->pending_refcnt, cpu); return refcnt; } static int packet_alloc_pending(struct packet_sock *po) { po->rx_ring.pending_refcnt = NULL; po->tx_ring.pending_refcnt = alloc_percpu(unsigned int); if (unlikely(po->tx_ring.pending_refcnt == NULL)) return -ENOBUFS; return 0; } static void packet_free_pending(struct packet_sock *po) { free_percpu(po->tx_ring.pending_refcnt); } #define ROOM_POW_OFF 2 #define ROOM_NONE 0x0 #define ROOM_LOW 0x1 #define ROOM_NORMAL 0x2 static bool __tpacket_has_room(const struct packet_sock *po, int pow_off) { int idx, len; len = READ_ONCE(po->rx_ring.frame_max) + 1; idx = READ_ONCE(po->rx_ring.head); if (pow_off) idx += len >> pow_off; if (idx >= len) idx -= len; return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL); } static bool __tpacket_v3_has_room(const struct packet_sock *po, int pow_off) { int idx, len; len = READ_ONCE(po->rx_ring.prb_bdqc.knum_blocks); idx = READ_ONCE(po->rx_ring.prb_bdqc.kactive_blk_num); if (pow_off) idx += len >> pow_off; if (idx >= len) idx -= len; return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL); } static int __packet_rcv_has_room(const struct packet_sock *po, const struct sk_buff *skb) { const struct sock *sk = &po->sk; int ret = ROOM_NONE; if (po->prot_hook.func != tpacket_rcv) { int rcvbuf = READ_ONCE(sk->sk_rcvbuf); int avail = rcvbuf - atomic_read(&sk->sk_rmem_alloc) - (skb ? skb->truesize : 0); if (avail > (rcvbuf >> ROOM_POW_OFF)) return ROOM_NORMAL; else if (avail > 0) return ROOM_LOW; else return ROOM_NONE; } if (po->tp_version == TPACKET_V3) { if (__tpacket_v3_has_room(po, ROOM_POW_OFF)) ret = ROOM_NORMAL; else if (__tpacket_v3_has_room(po, 0)) ret = ROOM_LOW; } else { if (__tpacket_has_room(po, ROOM_POW_OFF)) ret = ROOM_NORMAL; else if (__tpacket_has_room(po, 0)) ret = ROOM_LOW; } return ret; } static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) { bool pressure; int ret; ret = __packet_rcv_has_room(po, skb); pressure = ret != ROOM_NORMAL; if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) != pressure) packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, pressure); return ret; } static void packet_rcv_try_clear_pressure(struct packet_sock *po) { if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL) packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, false); } static void packet_sock_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_error_queue); WARN_ON(atomic_read(&sk->sk_rmem_alloc)); WARN_ON(refcount_read(&sk->sk_wmem_alloc)); if (!sock_flag(sk, SOCK_DEAD)) { pr_err("Attempt to release alive packet socket: %p\n", sk); return; } } static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb) { u32 *history = po->rollover->history; u32 victim, rxhash; int i, count = 0; rxhash = skb_get_hash(skb); for (i = 0; i < ROLLOVER_HLEN; i++) if (READ_ONCE(history[i]) == rxhash) count++; victim = get_random_u32_below(ROLLOVER_HLEN); /* Avoid dirtying the cache line if possible */ if (READ_ONCE(history[victim]) != rxhash) WRITE_ONCE(history[victim], rxhash); return count > (ROLLOVER_HLEN >> 1); } static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { return reciprocal_scale(__skb_get_hash_symmetric(skb), num); } static unsigned int fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { unsigned int val = atomic_inc_return(&f->rr_cur); return val % num; } static unsigned int fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { return smp_processor_id() % num; } static unsigned int fanout_demux_rnd(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { return get_random_u32_below(num); } static unsigned int fanout_demux_rollover(struct packet_fanout *f, struct sk_buff *skb, unsigned int idx, bool try_self, unsigned int num) { struct packet_sock *po, *po_next, *po_skip = NULL; unsigned int i, j, room = ROOM_NONE; po = pkt_sk(rcu_dereference(f->arr[idx])); if (try_self) { room = packet_rcv_has_room(po, skb); if (room == ROOM_NORMAL || (room == ROOM_LOW && !fanout_flow_is_huge(po, skb))) return idx; po_skip = po; } i = j = min_t(int, po->rollover->sock, num - 1); do { po_next = pkt_sk(rcu_dereference(f->arr[i])); if (po_next != po_skip && !packet_sock_flag(po_next, PACKET_SOCK_PRESSURE) && packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) { if (i != j) po->rollover->sock = i; atomic_long_inc(&po->rollover->num); if (room == ROOM_LOW) atomic_long_inc(&po->rollover->num_huge); return i; } if (++i == num) i = 0; } while (i != j); atomic_long_inc(&po->rollover->num_failed); return idx; } static unsigned int fanout_demux_qm(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { return skb_get_queue_mapping(skb) % num; } static unsigned int fanout_demux_bpf(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { struct bpf_prog *prog; unsigned int ret = 0; rcu_read_lock(); prog = rcu_dereference(f->bpf_prog); if (prog) ret = bpf_prog_run_clear_cb(prog, skb) % num; rcu_read_unlock(); return ret; } static bool fanout_has_flag(struct packet_fanout *f, u16 flag) { return f->flags & (flag >> 8); } static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct packet_fanout *f = pt->af_packet_priv; unsigned int num = READ_ONCE(f->num_members); struct net *net = read_pnet(&f->net); struct packet_sock *po; unsigned int idx; if (!net_eq(dev_net(dev), net) || !num) { kfree_skb(skb); return 0; } if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) { skb = ip_check_defrag(net, skb, IP_DEFRAG_AF_PACKET); if (!skb) return 0; } switch (f->type) { case PACKET_FANOUT_HASH: default: idx = fanout_demux_hash(f, skb, num); break; case PACKET_FANOUT_LB: idx = fanout_demux_lb(f, skb, num); break; case PACKET_FANOUT_CPU: idx = fanout_demux_cpu(f, skb, num); break; case PACKET_FANOUT_RND: idx = fanout_demux_rnd(f, skb, num); break; case PACKET_FANOUT_QM: idx = fanout_demux_qm(f, skb, num); break; case PACKET_FANOUT_ROLLOVER: idx = fanout_demux_rollover(f, skb, 0, false, num); break; case PACKET_FANOUT_CBPF: case PACKET_FANOUT_EBPF: idx = fanout_demux_bpf(f, skb, num); break; } if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER)) idx = fanout_demux_rollover(f, skb, idx, true, num); po = pkt_sk(rcu_dereference(f->arr[idx])); return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); } DEFINE_MUTEX(fanout_mutex); EXPORT_SYMBOL_GPL(fanout_mutex); static LIST_HEAD(fanout_list); static u16 fanout_next_id; static void __fanout_link(struct sock *sk, struct packet_sock *po) { struct packet_fanout *f = po->fanout; spin_lock(&f->lock); rcu_assign_pointer(f->arr[f->num_members], sk); smp_wmb(); f->num_members++; if (f->num_members == 1) dev_add_pack(&f->prot_hook); spin_unlock(&f->lock); } static void __fanout_unlink(struct sock *sk, struct packet_sock *po) { struct packet_fanout *f = po->fanout; int i; spin_lock(&f->lock); for (i = 0; i < f->num_members; i++) { if (rcu_dereference_protected(f->arr[i], lockdep_is_held(&f->lock)) == sk) break; } BUG_ON(i >= f->num_members); rcu_assign_pointer(f->arr[i], rcu_dereference_protected(f->arr[f->num_members - 1], lockdep_is_held(&f->lock))); f->num_members--; if (f->num_members == 0) __dev_remove_pack(&f->prot_hook); spin_unlock(&f->lock); } static bool match_fanout_group(struct packet_type *ptype, struct sock *sk) { if (sk->sk_family != PF_PACKET) return false; return ptype->af_packet_priv == pkt_sk(sk)->fanout; } static void fanout_init_data(struct packet_fanout *f) { switch (f->type) { case PACKET_FANOUT_LB: atomic_set(&f->rr_cur, 0); break; case PACKET_FANOUT_CBPF: case PACKET_FANOUT_EBPF: RCU_INIT_POINTER(f->bpf_prog, NULL); break; } } static void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new) { struct bpf_prog *old; spin_lock(&f->lock); old = rcu_dereference_protected(f->bpf_prog, lockdep_is_held(&f->lock)); rcu_assign_pointer(f->bpf_prog, new); spin_unlock(&f->lock); if (old) { synchronize_net(); bpf_prog_destroy(old); } } static int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data, unsigned int len) { struct bpf_prog *new; struct sock_fprog fprog; int ret; if (sock_flag(&po->sk, SOCK_FILTER_LOCKED)) return -EPERM; ret = copy_bpf_fprog_from_user(&fprog, data, len); if (ret) return ret; ret = bpf_prog_create_from_user(&new, &fprog, NULL, false); if (ret) return ret; __fanout_set_data_bpf(po->fanout, new); return 0; } static int fanout_set_data_ebpf(struct packet_sock *po, sockptr_t data, unsigned int len) { struct bpf_prog *new; u32 fd; if (sock_flag(&po->sk, SOCK_FILTER_LOCKED)) return -EPERM; if (len != sizeof(fd)) return -EINVAL; if (copy_from_sockptr(&fd, data, len)) return -EFAULT; new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); if (IS_ERR(new)) return PTR_ERR(new); __fanout_set_data_bpf(po->fanout, new); return 0; } static int fanout_set_data(struct packet_sock *po, sockptr_t data, unsigned int len) { switch (po->fanout->type) { case PACKET_FANOUT_CBPF: return fanout_set_data_cbpf(po, data, len); case PACKET_FANOUT_EBPF: return fanout_set_data_ebpf(po, data, len); default: return -EINVAL; } } static void fanout_release_data(struct packet_fanout *f) { switch (f->type) { case PACKET_FANOUT_CBPF: case PACKET_FANOUT_EBPF: __fanout_set_data_bpf(f, NULL); } } static bool __fanout_id_is_free(struct sock *sk, u16 candidate_id) { struct packet_fanout *f; list_for_each_entry(f, &fanout_list, list) { if (f->id == candidate_id && read_pnet(&f->net) == sock_net(sk)) { return false; } } return true; } static bool fanout_find_new_id(struct sock *sk, u16 *new_id) { u16 id = fanout_next_id; do { if (__fanout_id_is_free(sk, id)) { *new_id = id; fanout_next_id = id + 1; return true; } id++; } while (id != fanout_next_id); return false; } static int fanout_add(struct sock *sk, struct fanout_args *args) { struct packet_rollover *rollover = NULL; struct packet_sock *po = pkt_sk(sk); u16 type_flags = args->type_flags; struct packet_fanout *f, *match; u8 type = type_flags & 0xff; u8 flags = type_flags >> 8; u16 id = args->id; int err; switch (type) { case PACKET_FANOUT_ROLLOVER: if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER) return -EINVAL; break; case PACKET_FANOUT_HASH: case PACKET_FANOUT_LB: case PACKET_FANOUT_CPU: case PACKET_FANOUT_RND: case PACKET_FANOUT_QM: case PACKET_FANOUT_CBPF: case PACKET_FANOUT_EBPF: break; default: return -EINVAL; } mutex_lock(&fanout_mutex); err = -EALREADY; if (po->fanout) goto out; if (type == PACKET_FANOUT_ROLLOVER || (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) { err = -ENOMEM; rollover = kzalloc(sizeof(*rollover), GFP_KERNEL); if (!rollover) goto out; atomic_long_set(&rollover->num, 0); atomic_long_set(&rollover->num_huge, 0); atomic_long_set(&rollover->num_failed, 0); } if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) { if (id != 0) { err = -EINVAL; goto out; } if (!fanout_find_new_id(sk, &id)) { err = -ENOMEM; goto out; } /* ephemeral flag for the first socket in the group: drop it */ flags &= ~(PACKET_FANOUT_FLAG_UNIQUEID >> 8); } match = NULL; list_for_each_entry(f, &fanout_list, list) { if (f->id == id && read_pnet(&f->net) == sock_net(sk)) { match = f; break; } } err = -EINVAL; if (match) { if (match->flags != flags) goto out; if (args->max_num_members && args->max_num_members != match->max_num_members) goto out; } else { if (args->max_num_members > PACKET_FANOUT_MAX) goto out; if (!args->max_num_members) /* legacy PACKET_FANOUT_MAX */ args->max_num_members = 256; err = -ENOMEM; match = kvzalloc(struct_size(match, arr, args->max_num_members), GFP_KERNEL); if (!match) goto out; write_pnet(&match->net, sock_net(sk)); match->id = id; match->type = type; match->flags = flags; INIT_LIST_HEAD(&match->list); spin_lock_init(&match->lock); refcount_set(&match->sk_ref, 0); fanout_init_data(match); match->prot_hook.type = po->prot_hook.type; match->prot_hook.dev = po->prot_hook.dev; match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; match->prot_hook.af_packet_net = read_pnet(&match->net); match->prot_hook.id_match = match_fanout_group; match->max_num_members = args->max_num_members; match->prot_hook.ignore_outgoing = type_flags & PACKET_FANOUT_FLAG_IGNORE_OUTGOING; list_add(&match->list, &fanout_list); } err = -EINVAL; spin_lock(&po->bind_lock); if (po->num && match->type == type && match->prot_hook.type == po->prot_hook.type && match->prot_hook.dev == po->prot_hook.dev) { err = -ENOSPC; if (refcount_read(&match->sk_ref) < match->max_num_members) { /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */ WRITE_ONCE(po->fanout, match); po->rollover = rollover; rollover = NULL; refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) { __dev_remove_pack(&po->prot_hook); __fanout_link(sk, po); } err = 0; } } spin_unlock(&po->bind_lock); if (err && !refcount_read(&match->sk_ref)) { list_del(&match->list); kvfree(match); } out: kfree(rollover); mutex_unlock(&fanout_mutex); return err; } /* If pkt_sk(sk)->fanout->sk_ref is zero, this function removes * pkt_sk(sk)->fanout from fanout_list and returns pkt_sk(sk)->fanout. * It is the responsibility of the caller to call fanout_release_data() and * free the returned packet_fanout (after synchronize_net()) */ static struct packet_fanout *fanout_release(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f; mutex_lock(&fanout_mutex); f = po->fanout; if (f) { po->fanout = NULL; if (refcount_dec_and_test(&f->sk_ref)) list_del(&f->list); else f = NULL; } mutex_unlock(&fanout_mutex); return f; } static bool packet_extra_vlan_len_allowed(const struct net_device *dev, struct sk_buff *skb) { /* Earlier code assumed this would be a VLAN pkt, double-check * this now that we have the actual packet in hand. We can only * do this check on Ethernet devices. */ if (unlikely(dev->type != ARPHRD_ETHER)) return false; skb_reset_mac_header(skb); return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q)); } static const struct proto_ops packet_ops; static const struct proto_ops packet_ops_spkt; static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct sockaddr_pkt *spkt; /* * When we registered the protocol we saved the socket in the data * field for just this event. */ sk = pt->af_packet_priv; /* * Yank back the headers [hope the device set this * right or kerboom...] * * Incoming packets have ll header pulled, * push it back. * * For outgoing ones skb->data == skb_mac_header(skb) * so that this procedure is noop. */ if (skb->pkt_type == PACKET_LOOPBACK) goto out; if (!net_eq(dev_net(dev), sock_net(sk))) goto out; skb = skb_share_check(skb, GFP_ATOMIC); if (skb == NULL) goto oom; /* drop any routing info */ skb_dst_drop(skb); /* drop conntrack reference */ nf_reset_ct(skb); spkt = &PACKET_SKB_CB(skb)->sa.pkt; skb_push(skb, skb->data - skb_mac_header(skb)); /* * The SOCK_PACKET socket receives _all_ frames. */ spkt->spkt_family = dev->type; strscpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device)); spkt->spkt_protocol = skb->protocol; /* * Charge the memory to the socket. This is done specifically * to prevent sockets using all the memory up. */ if (sock_queue_rcv_skb(sk, skb) == 0) return 0; out: kfree_skb(skb); oom: return 0; } static void packet_parse_headers(struct sk_buff *skb, struct socket *sock) { int depth; if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) && sock->type == SOCK_RAW) { skb_reset_mac_header(skb); skb->protocol = dev_parse_header_protocol(skb); } /* Move network header to the right position for VLAN tagged packets */ if (likely(skb->dev->type == ARPHRD_ETHER) && eth_type_vlan(skb->protocol) && vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) skb_set_network_header(skb, depth); skb_probe_transport_header(skb); } /* * Output a raw packet to a device layer. This bypasses all the other * protocol layers and you must therefore supply it with a complete frame */ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name); struct sk_buff *skb = NULL; struct net_device *dev; struct sockcm_cookie sockc; __be16 proto = 0; int err; int extra_len = 0; /* * Get and verify the address. */ if (saddr) { if (msg->msg_namelen < sizeof(struct sockaddr)) return -EINVAL; if (msg->msg_namelen == sizeof(struct sockaddr_pkt)) proto = saddr->spkt_protocol; } else return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */ /* * Find the device first to size check it */ saddr->spkt_device[sizeof(saddr->spkt_device) - 1] = 0; retry: rcu_read_lock(); dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device); err = -ENODEV; if (dev == NULL) goto out_unlock; err = -ENETDOWN; if (!(dev->flags & IFF_UP)) goto out_unlock; /* * You may not queue a frame bigger than the mtu. This is the lowest level * raw protocol and you must do your own fragmentation at this level. */ if (unlikely(sock_flag(sk, SOCK_NOFCS))) { if (!netif_supports_nofcs(dev)) { err = -EPROTONOSUPPORT; goto out_unlock; } extra_len = 4; /* We're doing our own CRC */ } err = -EMSGSIZE; if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len) goto out_unlock; if (!skb) { size_t reserved = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0; rcu_read_unlock(); skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL); if (skb == NULL) return -ENOBUFS; /* FIXME: Save some space for broken drivers that write a hard * header at transmission time by themselves. PPP is the notable * one here. This should really be fixed at the driver level. */ skb_reserve(skb, reserved); skb_reset_network_header(skb); /* Try to align data part correctly */ if (hhlen) { skb->data -= hhlen; skb->tail -= hhlen; if (len < hhlen) skb_reset_network_header(skb); } err = memcpy_from_msg(skb_put(skb, len), msg, len); if (err) goto out_free; goto retry; } if (!dev_validate_header(dev, skb->data, len) || !skb->len) { err = -EINVAL; goto out_unlock; } if (len > (dev->mtu + dev->hard_header_len + extra_len) && !packet_extra_vlan_len_allowed(dev, skb)) { err = -EMSGSIZE; goto out_unlock; } sockcm_init(&sockc, sk); if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); if (unlikely(err)) goto out_unlock; } skb->protocol = proto; skb->dev = dev; skb->priority = sockc.priority; skb->mark = sockc.mark; skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); skb_setup_tx_timestamp(skb, &sockc); if (unlikely(extra_len == 4)) skb->no_fcs = 1; packet_parse_headers(skb, sock); dev_queue_xmit(skb); rcu_read_unlock(); return len; out_unlock: rcu_read_unlock(); out_free: kfree_skb(skb); return err; } static unsigned int run_filter(struct sk_buff *skb, const struct sock *sk, unsigned int res) { struct sk_filter *filter; rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); if (filter != NULL) res = bpf_prog_run_clear_cb(filter->prog, skb); rcu_read_unlock(); return res; } static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb, size_t *len, int vnet_hdr_sz) { struct virtio_net_hdr_mrg_rxbuf vnet_hdr = { .num_buffers = 0 }; if (*len < vnet_hdr_sz) return -EINVAL; *len -= vnet_hdr_sz; if (virtio_net_hdr_from_skb(skb, (struct virtio_net_hdr *)&vnet_hdr, vio_le(), true, 0)) return -EINVAL; return memcpy_to_msg(msg, (void *)&vnet_hdr, vnet_hdr_sz); } /* * This function makes lazy skb cloning in hope that most of packets * are discarded by BPF. * * Note tricky part: we DO mangle shared skb! skb->data, skb->len * and skb->cb are mangled. It works because (and until) packets * falling here are owned by current CPU. Output packets are cloned * by dev_queue_xmit_nit(), input packets are processed by net_bh * sequentially, so that if we return skb to original state on exit, * we will not harm anyone. */ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { enum skb_drop_reason drop_reason = SKB_CONSUMED; struct sock *sk = NULL; struct sockaddr_ll *sll; struct packet_sock *po; u8 *skb_head = skb->data; int skb_len = skb->len; unsigned int snaplen, res; if (skb->pkt_type == PACKET_LOOPBACK) goto drop; sk = pt->af_packet_priv; po = pkt_sk(sk); if (!net_eq(dev_net(dev), sock_net(sk))) goto drop; skb->dev = dev; if (dev_has_header(dev)) { /* The device has an explicit notion of ll header, * exported to higher levels. * * Otherwise, the device hides details of its frame * structure, so that corresponding packet head is * never delivered to user. */ if (sk->sk_type != SOCK_DGRAM) skb_push(skb, skb->data - skb_mac_header(skb)); else if (skb->pkt_type == PACKET_OUTGOING) { /* Special case: outgoing packets have ll header at head */ skb_pull(skb, skb_network_offset(skb)); } } snaplen = skb_frags_readable(skb) ? skb->len : skb_headlen(skb); res = run_filter(skb, sk, snaplen); if (!res) goto drop_n_restore; if (snaplen > res) snaplen = res; if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) goto drop_n_acct; if (skb_shared(skb)) { struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); if (nskb == NULL) goto drop_n_acct; if (skb_head != skb->data) { skb->data = skb_head; skb->len = skb_len; } consume_skb(skb); skb = nskb; } sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8); sll = &PACKET_SKB_CB(skb)->sa.ll; sll->sll_hatype = dev->type; sll->sll_pkttype = skb->pkt_type; if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; sll->sll_halen = dev_parse_header(skb, sll->sll_addr); /* sll->sll_family and sll->sll_protocol are set in packet_recvmsg(). * Use their space for storing the original skb length. */ PACKET_SKB_CB(skb)->sa.origlen = skb->len; if (pskb_trim(skb, snaplen)) goto drop_n_acct; skb_set_owner_r(skb, sk); skb->dev = NULL; skb_dst_drop(skb); /* drop conntrack reference */ nf_reset_ct(skb); spin_lock(&sk->sk_receive_queue.lock); po->stats.stats1.tp_packets++; sock_skb_set_dropcount(sk, skb); skb_clear_delivery_time(skb); __skb_queue_tail(&sk->sk_receive_queue, skb); spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk); return 0; drop_n_acct: atomic_inc(&po->tp_drops); atomic_inc(&sk->sk_drops); drop_reason = SKB_DROP_REASON_PACKET_SOCK_ERROR; drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { skb->data = skb_head; skb->len = skb_len; } drop: sk_skb_reason_drop(sk, skb, drop_reason); return 0; } static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { enum skb_drop_reason drop_reason = SKB_CONSUMED; struct sock *sk = NULL; struct packet_sock *po; struct sockaddr_ll *sll; union tpacket_uhdr h; u8 *skb_head = skb->data; int skb_len = skb->len; unsigned int snaplen, res; unsigned long status = TP_STATUS_USER; unsigned short macoff, hdrlen; unsigned int netoff; struct sk_buff *copy_skb = NULL; struct timespec64 ts; __u32 ts_status; unsigned int slot_id = 0; int vnet_hdr_sz = 0; /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT. * We may add members to them until current aligned size without forcing * userspace to call getsockopt(..., PACKET_HDRLEN, ...). */ BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h2)) != 32); BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h3)) != 48); if (skb->pkt_type == PACKET_LOOPBACK) goto drop; sk = pt->af_packet_priv; po = pkt_sk(sk); if (!net_eq(dev_net(dev), sock_net(sk))) goto drop; if (dev_has_header(dev)) { if (sk->sk_type != SOCK_DGRAM) skb_push(skb, skb->data - skb_mac_header(skb)); else if (skb->pkt_type == PACKET_OUTGOING) { /* Special case: outgoing packets have ll header at head */ skb_pull(skb, skb_network_offset(skb)); } } snaplen = skb_frags_readable(skb) ? skb->len : skb_headlen(skb); res = run_filter(skb, sk, snaplen); if (!res) goto drop_n_restore; /* If we are flooded, just give up */ if (__packet_rcv_has_room(po, skb) == ROOM_NONE) { atomic_inc(&po->tp_drops); goto drop_n_restore; } if (skb->ip_summed == CHECKSUM_PARTIAL) status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && skb_csum_unnecessary(skb)) status |= TP_STATUS_CSUM_VALID; if (skb_is_gso(skb) && skb_is_gso_tcp(skb)) status |= TP_STATUS_GSO_TCP; if (snaplen > res) snaplen = res; if (sk->sk_type == SOCK_DGRAM) { macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 + po->tp_reserve; } else { unsigned int maclen = skb_network_offset(skb); netoff = TPACKET_ALIGN(po->tp_hdrlen + (maclen < 16 ? 16 : maclen)) + po->tp_reserve; vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz); if (vnet_hdr_sz) netoff += vnet_hdr_sz; macoff = netoff - maclen; } if (netoff > USHRT_MAX) { atomic_inc(&po->tp_drops); goto drop_n_restore; } if (po->tp_version <= TPACKET_V2) { if (macoff + snaplen > po->rx_ring.frame_size) { if (READ_ONCE(po->copy_thresh) && atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { if (skb_shared(skb)) { copy_skb = skb_clone(skb, GFP_ATOMIC); } else { copy_skb = skb_get(skb); skb_head = skb->data; } if (copy_skb) { memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0, sizeof(PACKET_SKB_CB(copy_skb)->sa.ll)); skb_set_owner_r(copy_skb, sk); } } snaplen = po->rx_ring.frame_size - macoff; if ((int)snaplen < 0) { snaplen = 0; vnet_hdr_sz = 0; } } } else if (unlikely(macoff + snaplen > GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) { u32 nval; nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff; pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n", snaplen, nval, macoff); snaplen = nval; if (unlikely((int)snaplen < 0)) { snaplen = 0; macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len; vnet_hdr_sz = 0; } } spin_lock(&sk->sk_receive_queue.lock); h.raw = packet_current_rx_frame(po, skb, TP_STATUS_KERNEL, (macoff+snaplen)); if (!h.raw) goto drop_n_account; if (po->tp_version <= TPACKET_V2) { slot_id = po->rx_ring.head; if (test_bit(slot_id, po->rx_ring.rx_owner_map)) goto drop_n_account; __set_bit(slot_id, po->rx_ring.rx_owner_map); } if (vnet_hdr_sz && virtio_net_hdr_from_skb(skb, h.raw + macoff - sizeof(struct virtio_net_hdr), vio_le(), true, 0)) { if (po->tp_version == TPACKET_V3) prb_clear_blk_fill_status(&po->rx_ring); goto drop_n_account; } if (po->tp_version <= TPACKET_V2) { packet_increment_rx_head(po, &po->rx_ring); /* * LOSING will be reported till you read the stats, * because it's COR - Clear On Read. * Anyways, moving it for V1/V2 only as V3 doesn't need this * at packet level. */ if (atomic_read(&po->tp_drops)) status |= TP_STATUS_LOSING; } po->stats.stats1.tp_packets++; if (copy_skb) { status |= TP_STATUS_COPY; skb_clear_delivery_time(copy_skb); __skb_queue_tail(&sk->sk_receive_queue, copy_skb); } spin_unlock(&sk->sk_receive_queue.lock); skb_copy_bits(skb, 0, h.raw + macoff, snaplen); /* Always timestamp; prefer an existing software timestamp taken * closer to the time of capture. */ ts_status = tpacket_get_timestamp(skb, &ts, READ_ONCE(po->tp_tstamp) | SOF_TIMESTAMPING_SOFTWARE); if (!ts_status) ktime_get_real_ts64(&ts); status |= ts_status; switch (po->tp_version) { case TPACKET_V1: h.h1->tp_len = skb->len; h.h1->tp_snaplen = snaplen; h.h1->tp_mac = macoff; h.h1->tp_net = netoff; h.h1->tp_sec = ts.tv_sec; h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC; hdrlen = sizeof(*h.h1); break; case TPACKET_V2: h.h2->tp_len = skb->len; h.h2->tp_snaplen = snaplen; h.h2->tp_mac = macoff; h.h2->tp_net = netoff; h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; if (skb_vlan_tag_present(skb)) { h.h2->tp_vlan_tci = skb_vlan_tag_get(skb); h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto); status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else if (unlikely(sk->sk_type == SOCK_DGRAM && eth_type_vlan(skb->protocol))) { h.h2->tp_vlan_tci = vlan_get_tci(skb, skb->dev); h.h2->tp_vlan_tpid = ntohs(skb->protocol); status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { h.h2->tp_vlan_tci = 0; h.h2->tp_vlan_tpid = 0; } memset(h.h2->tp_padding, 0, sizeof(h.h2->tp_padding)); hdrlen = sizeof(*h.h2); break; case TPACKET_V3: /* tp_nxt_offset,vlan are already populated above. * So DONT clear those fields here */ h.h3->tp_status |= status; h.h3->tp_len = skb->len; h.h3->tp_snaplen = snaplen; h.h3->tp_mac = macoff; h.h3->tp_net = netoff; h.h3->tp_sec = ts.tv_sec; h.h3->tp_nsec = ts.tv_nsec; memset(h.h3->tp_padding, 0, sizeof(h.h3->tp_padding)); hdrlen = sizeof(*h.h3); break; default: BUG(); } sll = h.raw + TPACKET_ALIGN(hdrlen); sll->sll_halen = dev_parse_header(skb, sll->sll_addr); sll->sll_family = AF_PACKET; sll->sll_hatype = dev->type; sll->sll_protocol = (sk->sk_type == SOCK_DGRAM) ? vlan_get_protocol_dgram(skb) : skb->protocol; sll->sll_pkttype = skb->pkt_type; if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; smp_mb(); #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 if (po->tp_version <= TPACKET_V2) { u8 *start, *end; end = (u8 *) PAGE_ALIGN((unsigned long) h.raw + macoff + snaplen); for (start = h.raw; start < end; start += PAGE_SIZE) flush_dcache_page(pgv_to_page(start)); } smp_wmb(); #endif if (po->tp_version <= TPACKET_V2) { spin_lock(&sk->sk_receive_queue.lock); __packet_set_status(po, h.raw, status); __clear_bit(slot_id, po->rx_ring.rx_owner_map); spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk); } else if (po->tp_version == TPACKET_V3) { prb_clear_blk_fill_status(&po->rx_ring); } drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { skb->data = skb_head; skb->len = skb_len; } drop: sk_skb_reason_drop(sk, skb, drop_reason); return 0; drop_n_account: spin_unlock(&sk->sk_receive_queue.lock); atomic_inc(&po->tp_drops); drop_reason = SKB_DROP_REASON_PACKET_SOCK_ERROR; sk->sk_data_ready(sk); sk_skb_reason_drop(sk, copy_skb, drop_reason); goto drop_n_restore; } static void tpacket_destruct_skb(struct sk_buff *skb) { struct packet_sock *po = pkt_sk(skb->sk); if (likely(po->tx_ring.pg_vec)) { void *ph; __u32 ts; ph = skb_zcopy_get_nouarg(skb); packet_dec_pending(&po->tx_ring); ts = __packet_set_timestamp(po, ph, skb); __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); complete(&po->skb_completion); } sock_wfree(skb); } static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len) { if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) + __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 > __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len))) vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(), __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) + __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2); if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len) return -EINVAL; return 0; } static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len, struct virtio_net_hdr *vnet_hdr, int vnet_hdr_sz) { int ret; if (*len < vnet_hdr_sz) return -EINVAL; *len -= vnet_hdr_sz; if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter)) return -EFAULT; ret = __packet_snd_vnet_parse(vnet_hdr, *len); if (ret) return ret; /* move iter to point to the start of mac header */ if (vnet_hdr_sz != sizeof(struct virtio_net_hdr)) iov_iter_advance(&msg->msg_iter, vnet_hdr_sz - sizeof(struct virtio_net_hdr)); return 0; } static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, void *frame, struct net_device *dev, void *data, int tp_len, __be16 proto, unsigned char *addr, int hlen, int copylen, const struct sockcm_cookie *sockc) { union tpacket_uhdr ph; int to_write, offset, len, nr_frags, len_max; struct socket *sock = po->sk.sk_socket; struct page *page; int err; ph.raw = frame; skb->protocol = proto; skb->dev = dev; skb->priority = sockc->priority; skb->mark = sockc->mark; skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, po->sk.sk_clockid); skb_setup_tx_timestamp(skb, sockc); skb_zcopy_set_nouarg(skb, ph.raw); skb_reserve(skb, hlen); skb_reset_network_header(skb); to_write = tp_len; if (sock->type == SOCK_DGRAM) { err = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, tp_len); if (unlikely(err < 0)) return -EINVAL; } else if (copylen) { int hdrlen = min_t(int, copylen, tp_len); skb_push(skb, dev->hard_header_len); skb_put(skb, copylen - dev->hard_header_len); err = skb_store_bits(skb, 0, data, hdrlen); if (unlikely(err)) return err; if (!dev_validate_header(dev, skb->data, hdrlen)) return -EINVAL; data += hdrlen; to_write -= hdrlen; } offset = offset_in_page(data); len_max = PAGE_SIZE - offset; len = ((to_write > len_max) ? len_max : to_write); skb->data_len = to_write; skb->len += to_write; skb->truesize += to_write; refcount_add(to_write, &po->sk.sk_wmem_alloc); while (likely(to_write)) { nr_frags = skb_shinfo(skb)->nr_frags; if (unlikely(nr_frags >= MAX_SKB_FRAGS)) { pr_err("Packet exceed the number of skb frags(%u)\n", (unsigned int)MAX_SKB_FRAGS); return -EFAULT; } page = pgv_to_page(data); data += len; flush_dcache_page(page); get_page(page); skb_fill_page_desc(skb, nr_frags, page, offset, len); to_write -= len; offset = 0; len_max = PAGE_SIZE; len = ((to_write > len_max) ? len_max : to_write); } packet_parse_headers(skb, sock); return tp_len; } static int tpacket_parse_header(struct packet_sock *po, void *frame, int size_max, void **data) { union tpacket_uhdr ph; int tp_len, off; ph.raw = frame; switch (po->tp_version) { case TPACKET_V3: if (ph.h3->tp_next_offset != 0) { pr_warn_once("variable sized slot not supported"); return -EINVAL; } tp_len = ph.h3->tp_len; break; case TPACKET_V2: tp_len = ph.h2->tp_len; break; default: tp_len = ph.h1->tp_len; break; } if (unlikely(tp_len > size_max)) { pr_err("packet size is too long (%d > %d)\n", tp_len, size_max); return -EMSGSIZE; } if (unlikely(packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF))) { int off_min, off_max; off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); off_max = po->tx_ring.frame_size - tp_len; if (po->sk.sk_type == SOCK_DGRAM) { switch (po->tp_version) { case TPACKET_V3: off = ph.h3->tp_net; break; case TPACKET_V2: off = ph.h2->tp_net; break; default: off = ph.h1->tp_net; break; } } else { switch (po->tp_version) { case TPACKET_V3: off = ph.h3->tp_mac; break; case TPACKET_V2: off = ph.h2->tp_mac; break; default: off = ph.h1->tp_mac; break; } } if (unlikely((off < off_min) || (off_max < off))) return -EINVAL; } else { off = po->tp_hdrlen - sizeof(struct sockaddr_ll); } *data = frame + off; return tp_len; } static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb = NULL; struct net_device *dev; struct virtio_net_hdr *vnet_hdr = NULL; struct sockcm_cookie sockc; __be16 proto; int err, reserve = 0; void *ph; DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name); bool need_wait = !(msg->msg_flags & MSG_DONTWAIT); int vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz); unsigned char *addr = NULL; int tp_len, size_max; void *data; int len_sum = 0; int status = TP_STATUS_AVAILABLE; int hlen, tlen, copylen = 0; long timeo = 0; mutex_lock(&po->pg_vec_lock); /* packet_sendmsg() check on tx_ring.pg_vec was lockless, * we need to confirm it under protection of pg_vec_lock. */ if (unlikely(!po->tx_ring.pg_vec)) { err = -EBUSY; goto out; } if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = READ_ONCE(po->num); } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) goto out; if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) goto out; proto = saddr->sll_protocol; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); if (po->sk.sk_socket->type == SOCK_DGRAM) { if (dev && msg->msg_namelen < dev->addr_len + offsetof(struct sockaddr_ll, sll_addr)) goto out_put; addr = saddr->sll_addr; } } err = -ENXIO; if (unlikely(dev == NULL)) goto out; err = -ENETDOWN; if (unlikely(!(dev->flags & IFF_UP))) goto out_put; sockcm_init(&sockc, &po->sk); if (msg->msg_controllen) { err = sock_cmsg_send(&po->sk, msg, &sockc); if (unlikely(err)) goto out_put; } if (po->sk.sk_socket->type == SOCK_RAW) reserve = dev->hard_header_len; size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !vnet_hdr_sz) size_max = dev->mtu + reserve + VLAN_HLEN; reinit_completion(&po->skb_completion); do { ph = packet_current_frame(po, &po->tx_ring, TP_STATUS_SEND_REQUEST); if (unlikely(ph == NULL)) { if (need_wait && skb) { timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT); timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo); if (timeo <= 0) { err = !timeo ? -ETIMEDOUT : -ERESTARTSYS; goto out_put; } } /* check for additional frames */ continue; } skb = NULL; tp_len = tpacket_parse_header(po, ph, size_max, &data); if (tp_len < 0) goto tpacket_error; status = TP_STATUS_SEND_REQUEST; hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; if (vnet_hdr_sz) { vnet_hdr = data; data += vnet_hdr_sz; tp_len -= vnet_hdr_sz; if (tp_len < 0 || __packet_snd_vnet_parse(vnet_hdr, tp_len)) { tp_len = -EINVAL; goto tpacket_error; } copylen = __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len); } copylen = max_t(int, copylen, dev->hard_header_len); skb = sock_alloc_send_skb(&po->sk, hlen + tlen + sizeof(struct sockaddr_ll) + (copylen - dev->hard_header_len), !need_wait, &err); if (unlikely(skb == NULL)) { /* we assume the socket was initially writeable ... */ if (likely(len_sum > 0)) err = len_sum; goto out_status; } tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto, addr, hlen, copylen, &sockc); if (likely(tp_len >= 0) && tp_len > dev->mtu + reserve && !vnet_hdr_sz && !packet_extra_vlan_len_allowed(dev, skb)) tp_len = -EMSGSIZE; if (unlikely(tp_len < 0)) { tpacket_error: if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS)) { __packet_set_status(po, ph, TP_STATUS_AVAILABLE); packet_increment_head(&po->tx_ring); kfree_skb(skb); continue; } else { status = TP_STATUS_WRONG_FORMAT; err = tp_len; goto out_status; } } if (vnet_hdr_sz) { if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) { tp_len = -EINVAL; goto tpacket_error; } virtio_net_hdr_set_proto(skb, vnet_hdr); } skb->destructor = tpacket_destruct_skb; __packet_set_status(po, ph, TP_STATUS_SENDING); packet_inc_pending(&po->tx_ring); status = TP_STATUS_SEND_REQUEST; err = packet_xmit(po, skb); if (unlikely(err != 0)) { if (err > 0) err = net_xmit_errno(err); if (err && __packet_get_status(po, ph) == TP_STATUS_AVAILABLE) { /* skb was destructed already */ skb = NULL; goto out_status; } /* * skb was dropped but not destructed yet; * let's treat it like congestion or err < 0 */ err = 0; } packet_increment_head(&po->tx_ring); len_sum += tp_len; } while (likely((ph != NULL) || /* Note: packet_read_pending() might be slow if we have * to call it as it's per_cpu variable, but in fast-path * we already short-circuit the loop with the first * condition, and luckily don't have to go that path * anyway. */ (need_wait && packet_read_pending(&po->tx_ring)))); err = len_sum; goto out_put; out_status: __packet_set_status(po, ph, status); kfree_skb(skb); out_put: dev_put(dev); out: mutex_unlock(&po->pg_vec_lock); return err; } static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad, size_t reserve, size_t len, size_t linear, int noblock, int *err) { struct sk_buff *skb; /* Under a page? Don't bother with paged skb. */ if (prepad + len < PAGE_SIZE || !linear) linear = len; if (len - linear > MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) linear = len - MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER); skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, err, PAGE_ALLOC_COSTLY_ORDER); if (!skb) return NULL; skb_reserve(skb, reserve); skb_put(skb, linear); skb->data_len = len - linear; skb->len += len - linear; return skb; } static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name); struct sk_buff *skb; struct net_device *dev; __be16 proto; unsigned char *addr = NULL; int err, reserve = 0; struct sockcm_cookie sockc; struct virtio_net_hdr vnet_hdr = { 0 }; int offset = 0; struct packet_sock *po = pkt_sk(sk); int vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz); int hlen, tlen, linear; int extra_len = 0; /* * Get and verify the address. */ if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = READ_ONCE(po->num); } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) goto out; if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) goto out; proto = saddr->sll_protocol; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); if (sock->type == SOCK_DGRAM) { if (dev && msg->msg_namelen < dev->addr_len + offsetof(struct sockaddr_ll, sll_addr)) goto out_unlock; addr = saddr->sll_addr; } } err = -ENXIO; if (unlikely(dev == NULL)) goto out_unlock; err = -ENETDOWN; if (unlikely(!(dev->flags & IFF_UP))) goto out_unlock; sockcm_init(&sockc, sk); if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); if (unlikely(err)) goto out_unlock; } if (sock->type == SOCK_RAW) reserve = dev->hard_header_len; if (vnet_hdr_sz) { err = packet_snd_vnet_parse(msg, &len, &vnet_hdr, vnet_hdr_sz); if (err) goto out_unlock; } if (unlikely(sock_flag(sk, SOCK_NOFCS))) { if (!netif_supports_nofcs(dev)) { err = -EPROTONOSUPPORT; goto out_unlock; } extra_len = 4; /* We're doing our own CRC */ } err = -EMSGSIZE; if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len)) goto out_unlock; err = -ENOBUFS; hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; linear = __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len); linear = max(linear, min_t(int, len, dev->hard_header_len)); skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, linear, msg->msg_flags & MSG_DONTWAIT, &err); if (skb == NULL) goto out_unlock; skb_reset_network_header(skb); err = -EINVAL; if (sock->type == SOCK_DGRAM) { offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len); if (unlikely(offset < 0)) goto out_free; } else if (reserve) { skb_reserve(skb, -reserve); if (len < reserve + sizeof(struct ipv6hdr) && dev->min_header_len != dev->hard_header_len) skb_reset_network_header(skb); } /* Returns -EFAULT on error */ err = skb_copy_datagram_from_iter(skb, offset, &msg->msg_iter, len); if (err) goto out_free; if ((sock->type == SOCK_RAW && !dev_validate_header(dev, skb->data, len)) || !skb->len) { err = -EINVAL; goto out_free; } skb_setup_tx_timestamp(skb, &sockc); if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) && !packet_extra_vlan_len_allowed(dev, skb)) { err = -EMSGSIZE; goto out_free; } skb->protocol = proto; skb->dev = dev; skb->priority = sockc.priority; skb->mark = sockc.mark; skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); if (unlikely(extra_len == 4)) skb->no_fcs = 1; packet_parse_headers(skb, sock); if (vnet_hdr_sz) { err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le()); if (err) goto out_free; len += vnet_hdr_sz; virtio_net_hdr_set_proto(skb, &vnet_hdr); } err = packet_xmit(po, skb); if (unlikely(err != 0)) { if (err > 0) err = net_xmit_errno(err); if (err) goto out_unlock; } dev_put(dev); return len; out_free: kfree_skb(skb); out_unlock: dev_put(dev); out: return err; } static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy. * tpacket_snd() will redo the check safely. */ if (data_race(po->tx_ring.pg_vec)) return tpacket_snd(po, msg); return packet_snd(sock, msg, len); } /* * Close a PACKET socket. This is fairly simple. We immediately go * to 'closed' state and remove our protocol entry in the device list. */ static int packet_release(struct socket *sock) { struct sock *sk = sock->sk; struct packet_sock *po; struct packet_fanout *f; struct net *net; union tpacket_req_u req_u; if (!sk) return 0; net = sock_net(sk); po = pkt_sk(sk); mutex_lock(&net->packet.sklist_lock); sk_del_node_init_rcu(sk); mutex_unlock(&net->packet.sklist_lock); sock_prot_inuse_add(net, sk->sk_prot, -1); spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); packet_cached_dev_reset(po); if (po->prot_hook.dev) { netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker); po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); packet_flush_mclist(sk); lock_sock(sk); if (po->rx_ring.pg_vec) { memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 0); } if (po->tx_ring.pg_vec) { memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 1); } release_sock(sk); f = fanout_release(sk); synchronize_net(); kfree(po->rollover); if (f) { fanout_release_data(f); kvfree(f); } /* * Now the socket is dead. No more input will appear. */ sock_orphan(sk); sock->sk = NULL; /* Purge queues */ skb_queue_purge(&sk->sk_receive_queue); packet_free_pending(po); sock_put(sk); return 0; } /* * Attach a packet hook. */ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, __be16 proto) { struct packet_sock *po = pkt_sk(sk); struct net_device *dev = NULL; bool unlisted = false; bool need_rehook; int ret = 0; lock_sock(sk); spin_lock(&po->bind_lock); if (!proto) proto = po->num; rcu_read_lock(); if (po->fanout) { ret = -EINVAL; goto out_unlock; } if (name) { dev = dev_get_by_name_rcu(sock_net(sk), name); if (!dev) { ret = -ENODEV; goto out_unlock; } } else if (ifindex) { dev = dev_get_by_index_rcu(sock_net(sk), ifindex); if (!dev) { ret = -ENODEV; goto out_unlock; } } need_rehook = po->prot_hook.type != proto || po->prot_hook.dev != dev; if (need_rehook) { dev_hold(dev); if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) { rcu_read_unlock(); /* prevents packet_notifier() from calling * register_prot_hook() */ WRITE_ONCE(po->num, 0); __unregister_prot_hook(sk, true); rcu_read_lock(); if (dev) unlisted = !dev_get_by_index_rcu(sock_net(sk), dev->ifindex); } BUG_ON(packet_sock_flag(po, PACKET_SOCK_RUNNING)); WRITE_ONCE(po->num, proto); po->prot_hook.type = proto; netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker); if (unlikely(unlisted)) { po->prot_hook.dev = NULL; WRITE_ONCE(po->ifindex, -1); packet_cached_dev_reset(po); } else { netdev_hold(dev, &po->prot_hook.dev_tracker, GFP_ATOMIC); po->prot_hook.dev = dev; WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0); packet_cached_dev_assign(po, dev); } dev_put(dev); } if (proto == 0 || !need_rehook) goto out_unlock; if (!unlisted && (!dev || (dev->flags & IFF_UP))) { register_prot_hook(sk); } else { sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); } out_unlock: rcu_read_unlock(); spin_unlock(&po->bind_lock); release_sock(sk); return ret; } /* * Bind a packet socket to a device */ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; char name[sizeof(uaddr->sa_data_min) + 1]; /* * Check legality */ if (addr_len != sizeof(struct sockaddr)) return -EINVAL; /* uaddr->sa_data comes from the userspace, it's not guaranteed to be * zero-terminated. */ memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min)); name[sizeof(uaddr->sa_data_min)] = 0; return packet_do_bind(sk, name, 0, 0); } static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr; struct sock *sk = sock->sk; /* * Check legality */ if (addr_len < sizeof(struct sockaddr_ll)) return -EINVAL; if (sll->sll_family != AF_PACKET) return -EINVAL; return packet_do_bind(sk, NULL, sll->sll_ifindex, sll->sll_protocol); } static struct proto packet_proto = { .name = "PACKET", .owner = THIS_MODULE, .obj_size = sizeof(struct packet_sock), }; /* * Create a packet of type SOCK_PACKET. */ static int packet_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; struct packet_sock *po; __be16 proto = (__force __be16)protocol; /* weird, but documented */ int err; if (!ns_capable(net->user_ns, CAP_NET_RAW)) return -EPERM; if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW && sock->type != SOCK_PACKET) return -ESOCKTNOSUPPORT; sock->state = SS_UNCONNECTED; err = -ENOBUFS; sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern); if (sk == NULL) goto out; sock->ops = &packet_ops; if (sock->type == SOCK_PACKET) sock->ops = &packet_ops_spkt; po = pkt_sk(sk); err = packet_alloc_pending(po); if (err) goto out_sk_free; sock_init_data(sock, sk); init_completion(&po->skb_completion); sk->sk_family = PF_PACKET; po->num = proto; packet_cached_dev_reset(po); sk->sk_destruct = packet_sock_destruct; /* * Attach a protocol block */ spin_lock_init(&po->bind_lock); mutex_init(&po->pg_vec_lock); po->rollover = NULL; po->prot_hook.func = packet_rcv; if (sock->type == SOCK_PACKET) po->prot_hook.func = packet_rcv_spkt; po->prot_hook.af_packet_priv = sk; po->prot_hook.af_packet_net = sock_net(sk); if (proto) { po->prot_hook.type = proto; __register_prot_hook(sk); } mutex_lock(&net->packet.sklist_lock); sk_add_node_tail_rcu(sk, &net->packet.sklist); mutex_unlock(&net->packet.sklist_lock); sock_prot_inuse_add(net, &packet_proto, 1); return 0; out_sk_free: sk_free(sk); out: return err; } /* * Pull a packet from our receive queue and hand it to the user. * If necessary we block. */ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; int vnet_hdr_len = READ_ONCE(pkt_sk(sk)->vnet_hdr_sz); unsigned int origlen = 0; err = -EINVAL; if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE)) goto out; #if 0 /* What error should we return now? EUNATTACH? */ if (pkt_sk(sk)->ifindex < 0) return -ENODEV; #endif if (flags & MSG_ERRQUEUE) { err = sock_recv_errqueue(sk, msg, len, SOL_PACKET, PACKET_TX_TIMESTAMP); goto out; } /* * Call the generic datagram receiver. This handles all sorts * of horrible races and re-entrancy so we can forget about it * in the protocol layers. * * Now it will return ENETDOWN, if device have just gone down, * but then it will block. */ skb = skb_recv_datagram(sk, flags, &err); /* * An error occurred so return it. Because skb_recv_datagram() * handles the blocking we don't see and worry about blocking * retries. */ if (skb == NULL) goto out; packet_rcv_try_clear_pressure(pkt_sk(sk)); if (vnet_hdr_len) { err = packet_rcv_vnet(msg, skb, &len, vnet_hdr_len); if (err) goto out_free; } /* You lose any data beyond the buffer you gave. If it worries * a user program they can ask the device for its MTU * anyway. */ copied = skb->len; if (copied > len) { copied = len; msg->msg_flags |= MSG_TRUNC; } err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto out_free; if (sock->type != SOCK_PACKET) { struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; /* Original length was stored in sockaddr_ll fields */ origlen = PACKET_SKB_CB(skb)->sa.origlen; sll->sll_family = AF_PACKET; sll->sll_protocol = (sock->type == SOCK_DGRAM) ? vlan_get_protocol_dgram(skb) : skb->protocol; } sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name) { const size_t max_len = min(sizeof(skb->cb), sizeof(struct sockaddr_storage)); int copy_len; /* If the address length field is there to be filled * in, we fill it in now. */ if (sock->type == SOCK_PACKET) { __sockaddr_check_size(sizeof(struct sockaddr_pkt)); msg->msg_namelen = sizeof(struct sockaddr_pkt); copy_len = msg->msg_namelen; } else { struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); copy_len = msg->msg_namelen; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) { memset(msg->msg_name + offsetof(struct sockaddr_ll, sll_addr), 0, sizeof(sll->sll_addr)); msg->msg_namelen = sizeof(struct sockaddr_ll); } } if (WARN_ON_ONCE(copy_len > max_len)) { copy_len = max_len; msg->msg_namelen = copy_len; } memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_AUXDATA)) { struct tpacket_auxdata aux; aux.tp_status = TP_STATUS_USER; if (skb->ip_summed == CHECKSUM_PARTIAL) aux.tp_status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && skb_csum_unnecessary(skb)) aux.tp_status |= TP_STATUS_CSUM_VALID; if (skb_is_gso(skb) && skb_is_gso_tcp(skb)) aux.tp_status |= TP_STATUS_GSO_TCP; aux.tp_len = origlen; aux.tp_snaplen = skb->len; aux.tp_mac = 0; aux.tp_net = skb_network_offset(skb); if (skb_vlan_tag_present(skb)) { aux.tp_vlan_tci = skb_vlan_tag_get(skb); aux.tp_vlan_tpid = ntohs(skb->vlan_proto); aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else if (unlikely(sock->type == SOCK_DGRAM && eth_type_vlan(skb->protocol))) { struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; struct net_device *dev; rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), sll->sll_ifindex); if (dev) { aux.tp_vlan_tci = vlan_get_tci(skb, dev); aux.tp_vlan_tpid = ntohs(skb->protocol); aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { aux.tp_vlan_tci = 0; aux.tp_vlan_tpid = 0; } rcu_read_unlock(); } else { aux.tp_vlan_tci = 0; aux.tp_vlan_tpid = 0; } put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } /* * Free or return the buffer as appropriate. Again this * hides all the races and re-entrancy issues from us. */ err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied); out_free: skb_free_datagram(sk, skb); out: return err; } static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, int peer) { struct net_device *dev; struct sock *sk = sock->sk; if (peer) return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data_min)); rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); if (dev) strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data_min)); rcu_read_unlock(); return sizeof(*uaddr); } static int packet_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct net_device *dev; struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr); int ifindex; if (peer) return -EOPNOTSUPP; ifindex = READ_ONCE(po->ifindex); sll->sll_family = AF_PACKET; sll->sll_ifindex = ifindex; sll->sll_protocol = READ_ONCE(po->num); sll->sll_pkttype = 0; rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), ifindex); if (dev) { sll->sll_hatype = dev->type; sll->sll_halen = dev->addr_len; /* Let __fortify_memcpy_chk() know the actual buffer size. */ memcpy(((struct sockaddr_storage *)sll)->__data + offsetof(struct sockaddr_ll, sll_addr) - offsetofend(struct sockaddr_ll, sll_family), dev->dev_addr, dev->addr_len); } else { sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */ sll->sll_halen = 0; } rcu_read_unlock(); return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen; } static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what) { switch (i->type) { case PACKET_MR_MULTICAST: if (i->alen != dev->addr_len) return -EINVAL; if (what > 0) return dev_mc_add(dev, i->addr); else return dev_mc_del(dev, i->addr); break; case PACKET_MR_PROMISC: return dev_set_promiscuity(dev, what); case PACKET_MR_ALLMULTI: return dev_set_allmulti(dev, what); case PACKET_MR_UNICAST: if (i->alen != dev->addr_len) return -EINVAL; if (what > 0) return dev_uc_add(dev, i->addr); else return dev_uc_del(dev, i->addr); break; default: break; } return 0; } static void packet_dev_mclist_delete(struct net_device *dev, struct packet_mclist **mlp, struct list_head *list) { struct packet_mclist *ml; while ((ml = *mlp) != NULL) { if (ml->ifindex == dev->ifindex) { list_add(&ml->remove_list, list); *mlp = ml->next; } else mlp = &ml->next; } } static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) { struct packet_sock *po = pkt_sk(sk); struct packet_mclist *ml, *i; struct net_device *dev; int err; rtnl_lock(); err = -ENODEV; dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex); if (!dev) goto done; err = -EINVAL; if (mreq->mr_alen > dev->addr_len) goto done; err = -ENOBUFS; i = kmalloc(sizeof(*i), GFP_KERNEL); if (i == NULL) goto done; err = 0; for (ml = po->mclist; ml; ml = ml->next) { if (ml->ifindex == mreq->mr_ifindex && ml->type == mreq->mr_type && ml->alen == mreq->mr_alen && memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) { ml->count++; /* Free the new element ... */ kfree(i); goto done; } } i->type = mreq->mr_type; i->ifindex = mreq->mr_ifindex; i->alen = mreq->mr_alen; memcpy(i->addr, mreq->mr_address, i->alen); memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen); i->count = 1; INIT_LIST_HEAD(&i->remove_list); i->next = po->mclist; po->mclist = i; err = packet_dev_mc(dev, i, 1); if (err) { po->mclist = i->next; kfree(i); } done: rtnl_unlock(); return err; } static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) { struct packet_mclist *ml, **mlp; rtnl_lock(); for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) { if (ml->ifindex == mreq->mr_ifindex && ml->type == mreq->mr_type && ml->alen == mreq->mr_alen && memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) { if (--ml->count == 0) { struct net_device *dev; *mlp = ml->next; dev = __dev_get_by_index(sock_net(sk), ml->ifindex); if (dev) packet_dev_mc(dev, ml, -1); kfree(ml); } break; } } rtnl_unlock(); return 0; } static void packet_flush_mclist(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); struct packet_mclist *ml; if (!po->mclist) return; rtnl_lock(); while ((ml = po->mclist) != NULL) { struct net_device *dev; po->mclist = ml->next; dev = __dev_get_by_index(sock_net(sk), ml->ifindex); if (dev != NULL) packet_dev_mc(dev, ml, -1); kfree(ml); } rtnl_unlock(); } static int packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); int ret; if (level != SOL_PACKET) return -ENOPROTOOPT; switch (optname) { case PACKET_ADD_MEMBERSHIP: case PACKET_DROP_MEMBERSHIP: { struct packet_mreq_max mreq; int len = optlen; memset(&mreq, 0, sizeof(mreq)); if (len < sizeof(struct packet_mreq)) return -EINVAL; if (len > sizeof(mreq)) len = sizeof(mreq); if (copy_from_sockptr(&mreq, optval, len)) return -EFAULT; if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address))) return -EINVAL; if (optname == PACKET_ADD_MEMBERSHIP) ret = packet_mc_add(sk, &mreq); else ret = packet_mc_drop(sk, &mreq); return ret; } case PACKET_RX_RING: case PACKET_TX_RING: { union tpacket_req_u req_u; ret = -EINVAL; lock_sock(sk); switch (po->tp_version) { case TPACKET_V1: case TPACKET_V2: if (optlen < sizeof(req_u.req)) break; ret = copy_from_sockptr(&req_u.req, optval, sizeof(req_u.req)) ? -EINVAL : 0; break; case TPACKET_V3: default: if (optlen < sizeof(req_u.req3)) break; ret = copy_from_sockptr(&req_u.req3, optval, sizeof(req_u.req3)) ? -EINVAL : 0; break; } if (!ret) ret = packet_set_ring(sk, &req_u, 0, optname == PACKET_TX_RING); release_sock(sk); return ret; } case PACKET_COPY_THRESH: { int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; WRITE_ONCE(pkt_sk(sk)->copy_thresh, val); return 0; } case PACKET_VERSION: { int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; switch (val) { case TPACKET_V1: case TPACKET_V2: case TPACKET_V3: break; default: return -EINVAL; } lock_sock(sk); if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { ret = -EBUSY; } else { po->tp_version = val; ret = 0; } release_sock(sk); return ret; } case PACKET_RESERVE: { unsigned int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; if (val > INT_MAX) return -EINVAL; lock_sock(sk); if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { ret = -EBUSY; } else { po->tp_reserve = val; ret = 0; } release_sock(sk); return ret; } case PACKET_LOSS: { unsigned int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; lock_sock(sk); if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { ret = -EBUSY; } else { packet_sock_flag_set(po, PACKET_SOCK_TP_LOSS, val); ret = 0; } release_sock(sk); return ret; } case PACKET_AUXDATA: { int val; if (optlen < sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; packet_sock_flag_set(po, PACKET_SOCK_AUXDATA, val); return 0; } case PACKET_ORIGDEV: { int val; if (optlen < sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; packet_sock_flag_set(po, PACKET_SOCK_ORIGDEV, val); return 0; } case PACKET_VNET_HDR: case PACKET_VNET_HDR_SZ: { int val, hdr_len; if (sock->type != SOCK_RAW) return -EINVAL; if (optlen < sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; if (optname == PACKET_VNET_HDR_SZ) { if (val && val != sizeof(struct virtio_net_hdr) && val != sizeof(struct virtio_net_hdr_mrg_rxbuf)) return -EINVAL; hdr_len = val; } else { hdr_len = val ? sizeof(struct virtio_net_hdr) : 0; } lock_sock(sk); if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { ret = -EBUSY; } else { WRITE_ONCE(po->vnet_hdr_sz, hdr_len); ret = 0; } release_sock(sk); return ret; } case PACKET_TIMESTAMP: { int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; WRITE_ONCE(po->tp_tstamp, val); return 0; } case PACKET_FANOUT: { struct fanout_args args = { 0 }; if (optlen != sizeof(int) && optlen != sizeof(args)) return -EINVAL; if (copy_from_sockptr(&args, optval, optlen)) return -EFAULT; return fanout_add(sk, &args); } case PACKET_FANOUT_DATA: { /* Paired with the WRITE_ONCE() in fanout_add() */ if (!READ_ONCE(po->fanout)) return -EINVAL; return fanout_set_data(po, optval, optlen); } case PACKET_IGNORE_OUTGOING: { int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; if (val < 0 || val > 1) return -EINVAL; WRITE_ONCE(po->prot_hook.ignore_outgoing, !!val); return 0; } case PACKET_TX_HAS_OFF: { unsigned int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; lock_sock(sk); if (!po->rx_ring.pg_vec && !po->tx_ring.pg_vec) packet_sock_flag_set(po, PACKET_SOCK_TX_HAS_OFF, val); release_sock(sk); return 0; } case PACKET_QDISC_BYPASS: { int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; packet_sock_flag_set(po, PACKET_SOCK_QDISC_BYPASS, val); return 0; } default: return -ENOPROTOOPT; } } static int packet_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { int len; int val, lv = sizeof(val); struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); void *data = &val; union tpacket_stats_u st; struct tpacket_rollover_stats rstats; int drops; if (level != SOL_PACKET) return -ENOPROTOOPT; if (get_user(len, optlen)) return -EFAULT; if (len < 0) return -EINVAL; switch (optname) { case PACKET_STATISTICS: spin_lock_bh(&sk->sk_receive_queue.lock); memcpy(&st, &po->stats, sizeof(st)); memset(&po->stats, 0, sizeof(po->stats)); spin_unlock_bh(&sk->sk_receive_queue.lock); drops = atomic_xchg(&po->tp_drops, 0); if (po->tp_version == TPACKET_V3) { lv = sizeof(struct tpacket_stats_v3); st.stats3.tp_drops = drops; st.stats3.tp_packets += drops; data = &st.stats3; } else { lv = sizeof(struct tpacket_stats); st.stats1.tp_drops = drops; st.stats1.tp_packets += drops; data = &st.stats1; } break; case PACKET_AUXDATA: val = packet_sock_flag(po, PACKET_SOCK_AUXDATA); break; case PACKET_ORIGDEV: val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV); break; case PACKET_VNET_HDR: val = !!READ_ONCE(po->vnet_hdr_sz); break; case PACKET_VNET_HDR_SZ: val = READ_ONCE(po->vnet_hdr_sz); break; case PACKET_COPY_THRESH: val = READ_ONCE(pkt_sk(sk)->copy_thresh); break; case PACKET_VERSION: val = po->tp_version; break; case PACKET_HDRLEN: if (len > sizeof(int)) len = sizeof(int); if (len < sizeof(int)) return -EINVAL; if (copy_from_user(&val, optval, len)) return -EFAULT; switch (val) { case TPACKET_V1: val = sizeof(struct tpacket_hdr); break; case TPACKET_V2: val = sizeof(struct tpacket2_hdr); break; case TPACKET_V3: val = sizeof(struct tpacket3_hdr); break; default: return -EINVAL; } break; case PACKET_RESERVE: val = po->tp_reserve; break; case PACKET_LOSS: val = packet_sock_flag(po, PACKET_SOCK_TP_LOSS); break; case PACKET_TIMESTAMP: val = READ_ONCE(po->tp_tstamp); break; case PACKET_FANOUT: val = (po->fanout ? ((u32)po->fanout->id | ((u32)po->fanout->type << 16) | ((u32)po->fanout->flags << 24)) : 0); break; case PACKET_IGNORE_OUTGOING: val = READ_ONCE(po->prot_hook.ignore_outgoing); break; case PACKET_ROLLOVER_STATS: if (!po->rollover) return -EINVAL; rstats.tp_all = atomic_long_read(&po->rollover->num); rstats.tp_huge = atomic_long_read(&po->rollover->num_huge); rstats.tp_failed = atomic_long_read(&po->rollover->num_failed); data = &rstats; lv = sizeof(rstats); break; case PACKET_TX_HAS_OFF: val = packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF); break; case PACKET_QDISC_BYPASS: val = packet_sock_flag(po, PACKET_SOCK_QDISC_BYPASS); break; default: return -ENOPROTOOPT; } if (len > lv) len = lv; if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, data, len)) return -EFAULT; return 0; } static int packet_notifier(struct notifier_block *this, unsigned long msg, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct packet_mclist *ml, *tmp; LIST_HEAD(mclist); struct sock *sk; rcu_read_lock(); sk_for_each_rcu(sk, &net->packet.sklist) { struct packet_sock *po = pkt_sk(sk); switch (msg) { case NETDEV_UNREGISTER: if (po->mclist) packet_dev_mclist_delete(dev, &po->mclist, &mclist); fallthrough; case NETDEV_DOWN: if (dev->ifindex == po->ifindex) { spin_lock(&po->bind_lock); if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) { __unregister_prot_hook(sk, false); sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); } if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); WRITE_ONCE(po->ifindex, -1); netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker); po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); } break; case NETDEV_UP: if (dev->ifindex == po->ifindex) { spin_lock(&po->bind_lock); if (po->num) register_prot_hook(sk); spin_unlock(&po->bind_lock); } break; } } rcu_read_unlock(); /* packet_dev_mc might grab instance locks so can't run under rcu */ list_for_each_entry_safe(ml, tmp, &mclist, remove_list) { packet_dev_mc(dev, ml, -1); kfree(ml); } return NOTIFY_DONE; } static int packet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; switch (cmd) { case SIOCOUTQ: { int amount = sk_wmem_alloc_get(sk); return put_user(amount, (int __user *)arg); } case SIOCINQ: { struct sk_buff *skb; int amount = 0; spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb) amount = skb->len; spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(amount, (int __user *)arg); } #ifdef CONFIG_INET case SIOCADDRT: case SIOCDELRT: case SIOCDARP: case SIOCGARP: case SIOCSARP: case SIOCGIFADDR: case SIOCSIFADDR: case SIOCGIFBRDADDR: case SIOCSIFBRDADDR: case SIOCGIFNETMASK: case SIOCSIFNETMASK: case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: case SIOCSIFFLAGS: return inet_dgram_ops.ioctl(sock, cmd, arg); #endif default: return -ENOIOCTLCMD; } return 0; } static __poll_t packet_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); __poll_t mask = datagram_poll(file, sock, wait); spin_lock_bh(&sk->sk_receive_queue.lock); if (po->rx_ring.pg_vec) { if (!packet_previous_rx_frame(po, &po->rx_ring, TP_STATUS_KERNEL)) mask |= EPOLLIN | EPOLLRDNORM; } packet_rcv_try_clear_pressure(po); spin_unlock_bh(&sk->sk_receive_queue.lock); spin_lock_bh(&sk->sk_write_queue.lock); if (po->tx_ring.pg_vec) { if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE)) mask |= EPOLLOUT | EPOLLWRNORM; } spin_unlock_bh(&sk->sk_write_queue.lock); return mask; } /* Dirty? Well, I still did not learn better way to account * for user mmaps. */ static void packet_mm_open(struct vm_area_struct *vma) { struct file *file = vma->vm_file; struct socket *sock = file->private_data; struct sock *sk = sock->sk; if (sk) atomic_long_inc(&pkt_sk(sk)->mapped); } static void packet_mm_close(struct vm_area_struct *vma) { struct file *file = vma->vm_file; struct socket *sock = file->private_data; struct sock *sk = sock->sk; if (sk) atomic_long_dec(&pkt_sk(sk)->mapped); } static const struct vm_operations_struct packet_mmap_ops = { .open = packet_mm_open, .close = packet_mm_close, }; static void free_pg_vec(struct pgv *pg_vec, unsigned int order, unsigned int len) { int i; for (i = 0; i < len; i++) { if (likely(pg_vec[i].buffer)) { if (is_vmalloc_addr(pg_vec[i].buffer)) vfree(pg_vec[i].buffer); else free_pages((unsigned long)pg_vec[i].buffer, order); pg_vec[i].buffer = NULL; } } kfree(pg_vec); } static char *alloc_one_pg_vec_page(unsigned long order) { char *buffer; gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; buffer = (char *) __get_free_pages(gfp_flags, order); if (buffer) return buffer; /* __get_free_pages failed, fall back to vmalloc */ buffer = vzalloc(array_size((1 << order), PAGE_SIZE)); if (buffer) return buffer; /* vmalloc failed, lets dig into swap here */ gfp_flags &= ~__GFP_NORETRY; buffer = (char *) __get_free_pages(gfp_flags, order); if (buffer) return buffer; /* complete and utter failure */ return NULL; } static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) { unsigned int block_nr = req->tp_block_nr; struct pgv *pg_vec; int i; pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN); if (unlikely(!pg_vec)) goto out; for (i = 0; i < block_nr; i++) { pg_vec[i].buffer = alloc_one_pg_vec_page(order); if (unlikely(!pg_vec[i].buffer)) goto out_free_pgvec; } out: return pg_vec; out_free_pgvec: free_pg_vec(pg_vec, order, block_nr); pg_vec = NULL; goto out; } static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, int closing, int tx_ring) { struct pgv *pg_vec = NULL; struct packet_sock *po = pkt_sk(sk); unsigned long *rx_owner_map = NULL; int was_running, order = 0; struct packet_ring_buffer *rb; struct sk_buff_head *rb_queue; __be16 num; int err; /* Added to avoid minimal code churn */ struct tpacket_req *req = &req_u->req; rb = tx_ring ? &po->tx_ring : &po->rx_ring; rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; err = -EBUSY; if (!closing) { if (atomic_long_read(&po->mapped)) goto out; if (packet_read_pending(rb)) goto out; } if (req->tp_block_nr) { unsigned int min_frame_size; /* Sanity tests and some calculations */ err = -EBUSY; if (unlikely(rb->pg_vec)) goto out; switch (po->tp_version) { case TPACKET_V1: po->tp_hdrlen = TPACKET_HDRLEN; break; case TPACKET_V2: po->tp_hdrlen = TPACKET2_HDRLEN; break; case TPACKET_V3: po->tp_hdrlen = TPACKET3_HDRLEN; break; } err = -EINVAL; if (unlikely((int)req->tp_block_size <= 0)) goto out; if (unlikely(!PAGE_ALIGNED(req->tp_block_size))) goto out; min_frame_size = po->tp_hdrlen + po->tp_reserve; if (po->tp_version >= TPACKET_V3 && req->tp_block_size < BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + min_frame_size) goto out; if (unlikely(req->tp_frame_size < min_frame_size)) goto out; if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) goto out; rb->frames_per_block = req->tp_block_size / req->tp_frame_size; if (unlikely(rb->frames_per_block == 0)) goto out; if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr)) goto out; if (unlikely((rb->frames_per_block * req->tp_block_nr) != req->tp_frame_nr)) goto out; err = -ENOMEM; order = get_order(req->tp_block_size); pg_vec = alloc_pg_vec(req, order); if (unlikely(!pg_vec)) goto out; switch (po->tp_version) { case TPACKET_V3: /* Block transmit is not supported yet */ if (!tx_ring) { init_prb_bdqc(po, rb, pg_vec, req_u); } else { struct tpacket_req3 *req3 = &req_u->req3; if (req3->tp_retire_blk_tov || req3->tp_sizeof_priv || req3->tp_feature_req_word) { err = -EINVAL; goto out_free_pg_vec; } } break; default: if (!tx_ring) { rx_owner_map = bitmap_alloc(req->tp_frame_nr, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); if (!rx_owner_map) goto out_free_pg_vec; } break; } } /* Done */ else { err = -EINVAL; if (unlikely(req->tp_frame_nr)) goto out; } /* Detach socket from network */ spin_lock(&po->bind_lock); was_running = packet_sock_flag(po, PACKET_SOCK_RUNNING); num = po->num; if (was_running) { WRITE_ONCE(po->num, 0); __unregister_prot_hook(sk, false); } spin_unlock(&po->bind_lock); synchronize_net(); err = -EBUSY; mutex_lock(&po->pg_vec_lock); if (closing || atomic_long_read(&po->mapped) == 0) { err = 0; spin_lock_bh(&rb_queue->lock); swap(rb->pg_vec, pg_vec); if (po->tp_version <= TPACKET_V2) swap(rb->rx_owner_map, rx_owner_map); rb->frame_max = (req->tp_frame_nr - 1); rb->head = 0; rb->frame_size = req->tp_frame_size; spin_unlock_bh(&rb_queue->lock); swap(rb->pg_vec_order, order); swap(rb->pg_vec_len, req->tp_block_nr); rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE; po->prot_hook.func = (po->rx_ring.pg_vec) ? tpacket_rcv : packet_rcv; skb_queue_purge(rb_queue); if (atomic_long_read(&po->mapped)) pr_err("packet_mmap: vma is busy: %ld\n", atomic_long_read(&po->mapped)); } mutex_unlock(&po->pg_vec_lock); spin_lock(&po->bind_lock); if (was_running) { WRITE_ONCE(po->num, num); register_prot_hook(sk); } spin_unlock(&po->bind_lock); if (pg_vec && (po->tp_version > TPACKET_V2)) { /* Because we don't support block-based V3 on tx-ring */ if (!tx_ring) prb_shutdown_retire_blk_timer(po, rb_queue); } out_free_pg_vec: if (pg_vec) { bitmap_free(rx_owner_map); free_pg_vec(pg_vec, order, req->tp_block_nr); } out: return err; } static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); unsigned long size, expected_size; struct packet_ring_buffer *rb; unsigned long start; int err = -EINVAL; int i; if (vma->vm_pgoff) return -EINVAL; mutex_lock(&po->pg_vec_lock); expected_size = 0; for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) { if (rb->pg_vec) { expected_size += rb->pg_vec_len * rb->pg_vec_pages * PAGE_SIZE; } } if (expected_size == 0) goto out; size = vma->vm_end - vma->vm_start; if (size != expected_size) goto out; start = vma->vm_start; for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) { if (rb->pg_vec == NULL) continue; for (i = 0; i < rb->pg_vec_len; i++) { struct page *page; void *kaddr = rb->pg_vec[i].buffer; int pg_num; for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) { page = pgv_to_page(kaddr); err = vm_insert_page(vma, start, page); if (unlikely(err)) goto out; start += PAGE_SIZE; kaddr += PAGE_SIZE; } } } atomic_long_inc(&po->mapped); vma->vm_ops = &packet_mmap_ops; err = 0; out: mutex_unlock(&po->pg_vec_lock); return err; } static const struct proto_ops packet_ops_spkt = { .family = PF_PACKET, .owner = THIS_MODULE, .release = packet_release, .bind = packet_bind_spkt, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = packet_getname_spkt, .poll = datagram_poll, .ioctl = packet_ioctl, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .sendmsg = packet_sendmsg_spkt, .recvmsg = packet_recvmsg, .mmap = sock_no_mmap, }; static const struct proto_ops packet_ops = { .family = PF_PACKET, .owner = THIS_MODULE, .release = packet_release, .bind = packet_bind, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = packet_getname, .poll = packet_poll, .ioctl = packet_ioctl, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = packet_setsockopt, .getsockopt = packet_getsockopt, .sendmsg = packet_sendmsg, .recvmsg = packet_recvmsg, .mmap = packet_mmap, }; static const struct net_proto_family packet_family_ops = { .family = PF_PACKET, .create = packet_create, .owner = THIS_MODULE, }; static struct notifier_block packet_netdev_notifier = { .notifier_call = packet_notifier, }; #ifdef CONFIG_PROC_FS static void *packet_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { struct net *net = seq_file_net(seq); rcu_read_lock(); return seq_hlist_start_head_rcu(&net->packet.sklist, *pos); } static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct net *net = seq_file_net(seq); return seq_hlist_next_rcu(v, &net->packet.sklist, pos); } static void packet_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static int packet_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_printf(seq, "%*sRefCnt Type Proto Iface R Rmem User Inode\n", IS_ENABLED(CONFIG_64BIT) ? -17 : -9, "sk"); else { struct sock *s = sk_entry(v); const struct packet_sock *po = pkt_sk(s); seq_printf(seq, "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n", s, refcount_read(&s->sk_refcnt), s->sk_type, ntohs(READ_ONCE(po->num)), READ_ONCE(po->ifindex), packet_sock_flag(po, PACKET_SOCK_RUNNING), atomic_read(&s->sk_rmem_alloc), from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)), sock_i_ino(s)); } return 0; } static const struct seq_operations packet_seq_ops = { .start = packet_seq_start, .next = packet_seq_next, .stop = packet_seq_stop, .show = packet_seq_show, }; #endif static int __net_init packet_net_init(struct net *net) { mutex_init(&net->packet.sklist_lock); INIT_HLIST_HEAD(&net->packet.sklist); #ifdef CONFIG_PROC_FS if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops, sizeof(struct seq_net_private))) return -ENOMEM; #endif /* CONFIG_PROC_FS */ return 0; } static void __net_exit packet_net_exit(struct net *net) { remove_proc_entry("packet", net->proc_net); WARN_ON_ONCE(!hlist_empty(&net->packet.sklist)); } static struct pernet_operations packet_net_ops = { .init = packet_net_init, .exit = packet_net_exit, }; static void __exit packet_exit(void) { sock_unregister(PF_PACKET); proto_unregister(&packet_proto); unregister_netdevice_notifier(&packet_netdev_notifier); unregister_pernet_subsys(&packet_net_ops); } static int __init packet_init(void) { int rc; rc = register_pernet_subsys(&packet_net_ops); if (rc) goto out; rc = register_netdevice_notifier(&packet_netdev_notifier); if (rc) goto out_pernet; rc = proto_register(&packet_proto, 0); if (rc) goto out_notifier; rc = sock_register(&packet_family_ops); if (rc) goto out_proto; return 0; out_proto: proto_unregister(&packet_proto); out_notifier: unregister_netdevice_notifier(&packet_netdev_notifier); out_pernet: unregister_pernet_subsys(&packet_net_ops); out: return rc; } module_init(packet_init); module_exit(packet_exit); MODULE_DESCRIPTION("Packet socket support (AF_PACKET)"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_PACKET); |
| 5 5 5 5 5 3 1 2 3 9 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | // SPDX-License-Identifier: GPL-2.0 /* drivers/rtc/rtc-goldfish.c * * Copyright (C) 2007 Google, Inc. * Copyright (C) 2017 Imagination Technologies Ltd. */ #include <linux/io.h> #include <linux/module.h> #include <linux/of.h> #include <linux/platform_device.h> #include <linux/rtc.h> #include <linux/goldfish.h> #include <clocksource/timer-goldfish.h> struct goldfish_rtc { void __iomem *base; int irq; struct rtc_device *rtc; }; static int goldfish_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) { u64 rtc_alarm; u64 rtc_alarm_low; u64 rtc_alarm_high; void __iomem *base; struct goldfish_rtc *rtcdrv; rtcdrv = dev_get_drvdata(dev); base = rtcdrv->base; rtc_alarm_low = gf_ioread32(base + TIMER_ALARM_LOW); rtc_alarm_high = gf_ioread32(base + TIMER_ALARM_HIGH); rtc_alarm = (rtc_alarm_high << 32) | rtc_alarm_low; do_div(rtc_alarm, NSEC_PER_SEC); memset(alrm, 0, sizeof(struct rtc_wkalrm)); rtc_time64_to_tm(rtc_alarm, &alrm->time); if (gf_ioread32(base + TIMER_ALARM_STATUS)) alrm->enabled = 1; else alrm->enabled = 0; return 0; } static int goldfish_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) { struct goldfish_rtc *rtcdrv; u64 rtc_alarm64; u64 rtc_status_reg; void __iomem *base; rtcdrv = dev_get_drvdata(dev); base = rtcdrv->base; if (alrm->enabled) { rtc_alarm64 = rtc_tm_to_time64(&alrm->time) * NSEC_PER_SEC; gf_iowrite32((rtc_alarm64 >> 32), base + TIMER_ALARM_HIGH); gf_iowrite32(rtc_alarm64, base + TIMER_ALARM_LOW); gf_iowrite32(1, base + TIMER_IRQ_ENABLED); } else { /* * if this function was called with enabled=0 * then it could mean that the application is * trying to cancel an ongoing alarm */ rtc_status_reg = gf_ioread32(base + TIMER_ALARM_STATUS); if (rtc_status_reg) gf_iowrite32(1, base + TIMER_CLEAR_ALARM); } return 0; } static int goldfish_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) { void __iomem *base; struct goldfish_rtc *rtcdrv; rtcdrv = dev_get_drvdata(dev); base = rtcdrv->base; if (enabled) gf_iowrite32(1, base + TIMER_IRQ_ENABLED); else gf_iowrite32(0, base + TIMER_IRQ_ENABLED); return 0; } static irqreturn_t goldfish_rtc_interrupt(int irq, void *dev_id) { struct goldfish_rtc *rtcdrv = dev_id; void __iomem *base = rtcdrv->base; gf_iowrite32(1, base + TIMER_CLEAR_INTERRUPT); rtc_update_irq(rtcdrv->rtc, 1, RTC_IRQF | RTC_AF); return IRQ_HANDLED; } static int goldfish_rtc_read_time(struct device *dev, struct rtc_time *tm) { struct goldfish_rtc *rtcdrv; void __iomem *base; u64 time_high; u64 time_low; u64 time; rtcdrv = dev_get_drvdata(dev); base = rtcdrv->base; time_low = gf_ioread32(base + TIMER_TIME_LOW); time_high = gf_ioread32(base + TIMER_TIME_HIGH); time = (time_high << 32) | time_low; do_div(time, NSEC_PER_SEC); rtc_time64_to_tm(time, tm); return 0; } static int goldfish_rtc_set_time(struct device *dev, struct rtc_time *tm) { struct goldfish_rtc *rtcdrv; void __iomem *base; u64 now64; rtcdrv = dev_get_drvdata(dev); base = rtcdrv->base; now64 = rtc_tm_to_time64(tm) * NSEC_PER_SEC; gf_iowrite32((now64 >> 32), base + TIMER_TIME_HIGH); gf_iowrite32(now64, base + TIMER_TIME_LOW); return 0; } static const struct rtc_class_ops goldfish_rtc_ops = { .read_time = goldfish_rtc_read_time, .set_time = goldfish_rtc_set_time, .read_alarm = goldfish_rtc_read_alarm, .set_alarm = goldfish_rtc_set_alarm, .alarm_irq_enable = goldfish_rtc_alarm_irq_enable }; static int goldfish_rtc_probe(struct platform_device *pdev) { struct goldfish_rtc *rtcdrv; int err; rtcdrv = devm_kzalloc(&pdev->dev, sizeof(*rtcdrv), GFP_KERNEL); if (!rtcdrv) return -ENOMEM; platform_set_drvdata(pdev, rtcdrv); rtcdrv->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(rtcdrv->base)) return PTR_ERR(rtcdrv->base); rtcdrv->irq = platform_get_irq(pdev, 0); if (rtcdrv->irq < 0) return -ENODEV; rtcdrv->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(rtcdrv->rtc)) return PTR_ERR(rtcdrv->rtc); rtcdrv->rtc->ops = &goldfish_rtc_ops; rtcdrv->rtc->range_max = U64_MAX / NSEC_PER_SEC; err = devm_request_irq(&pdev->dev, rtcdrv->irq, goldfish_rtc_interrupt, 0, pdev->name, rtcdrv); if (err) return err; return devm_rtc_register_device(rtcdrv->rtc); } static const struct of_device_id goldfish_rtc_of_match[] = { { .compatible = "google,goldfish-rtc", }, {}, }; MODULE_DEVICE_TABLE(of, goldfish_rtc_of_match); static struct platform_driver goldfish_rtc = { .probe = goldfish_rtc_probe, .driver = { .name = "goldfish_rtc", .of_match_table = goldfish_rtc_of_match, } }; module_platform_driver(goldfish_rtc); MODULE_DESCRIPTION("Android Goldfish Real Time Clock driver"); MODULE_LICENSE("GPL v2"); |
| 41 4 14 17 17 70 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | // SPDX-License-Identifier: GPL-2.0 #ifndef IOU_KBUF_H #define IOU_KBUF_H #include <uapi/linux/io_uring.h> #include <linux/io_uring_types.h> enum { /* ring mapped provided buffers */ IOBL_BUF_RING = 1, /* buffers are consumed incrementally rather than always fully */ IOBL_INC = 2, }; struct io_buffer_list { /* * If ->buf_nr_pages is set, then buf_pages/buf_ring are used. If not, * then these are classic provided buffers and ->buf_list is used. */ union { struct list_head buf_list; struct io_uring_buf_ring *buf_ring; }; /* count of classic/legacy buffers in buffer list */ int nbufs; __u16 bgid; /* below is for ring provided buffers */ __u16 buf_nr_pages; __u16 nr_entries; __u16 head; __u16 mask; __u16 flags; struct io_mapped_region region; }; struct io_buffer { struct list_head list; __u64 addr; __u32 len; __u16 bid; __u16 bgid; }; enum { /* can alloc a bigger vec */ KBUF_MODE_EXPAND = 1, /* if bigger vec allocated, free old one */ KBUF_MODE_FREE = 2, }; struct buf_sel_arg { struct iovec *iovs; size_t out_len; size_t max_len; unsigned short nr_iovs; unsigned short mode; unsigned buf_group; }; void __user *io_buffer_select(struct io_kiocb *req, size_t *len, unsigned buf_group, unsigned int issue_flags); int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg, unsigned int issue_flags); int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg); void io_destroy_buffers(struct io_ring_ctx *ctx); int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_manage_buffers_legacy(struct io_kiocb *req, unsigned int issue_flags); int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg); int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg); int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg); bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); void io_kbuf_drop_legacy(struct io_kiocb *req); unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs); bool io_kbuf_commit(struct io_kiocb *req, struct io_buffer_list *bl, int len, int nr); struct io_mapped_region *io_pbuf_get_region(struct io_ring_ctx *ctx, unsigned int bgid); static inline bool io_kbuf_recycle_ring(struct io_kiocb *req) { /* * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear * the flag and hence ensure that bl->head doesn't get incremented. * If the tail has already been incremented, hang on to it. * The exception is partial io, that case we should increment bl->head * to monopolize the buffer. */ if (req->buf_list) { req->flags &= ~(REQ_F_BUFFER_RING|REQ_F_BUFFERS_COMMIT); return true; } return false; } static inline bool io_do_buffer_select(struct io_kiocb *req) { if (!(req->flags & REQ_F_BUFFER_SELECT)) return false; return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)); } static inline bool io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags) { if (req->flags & REQ_F_BL_NO_RECYCLE) return false; if (req->flags & REQ_F_BUFFER_SELECTED) return io_kbuf_recycle_legacy(req, issue_flags); if (req->flags & REQ_F_BUFFER_RING) return io_kbuf_recycle_ring(req); return false; } static inline unsigned int io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags) { if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED))) return 0; return __io_put_kbufs(req, len, 1); } static inline unsigned int io_put_kbufs(struct io_kiocb *req, int len, int nbufs, unsigned issue_flags) { if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED))) return 0; return __io_put_kbufs(req, len, nbufs); } #endif |
| 16 3423 16 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __ASM_RISCV_SPINLOCK_H #define __ASM_RISCV_SPINLOCK_H #ifdef CONFIG_QUEUED_SPINLOCKS #define _Q_PENDING_LOOPS (1 << 9) #endif #ifdef CONFIG_RISCV_COMBO_SPINLOCKS #define __no_arch_spinlock_redefine #include <asm/ticket_spinlock.h> #include <asm/qspinlock.h> #include <asm/jump_label.h> /* * TODO: Use an alternative instead of a static key when we are able to parse * the extensions string earlier in the boot process. */ DECLARE_STATIC_KEY_TRUE(qspinlock_key); #define SPINLOCK_BASE_DECLARE(op, type, type_lock) \ static __always_inline type arch_spin_##op(type_lock lock) \ { \ if (static_branch_unlikely(&qspinlock_key)) \ return queued_spin_##op(lock); \ return ticket_spin_##op(lock); \ } SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *) SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *) SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *) SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *) SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *) SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t) #elif defined(CONFIG_RISCV_QUEUED_SPINLOCKS) #include <asm/qspinlock.h> #else #include <asm/ticket_spinlock.h> #endif #include <asm/qrwlock.h> #endif /* __ASM_RISCV_SPINLOCK_H */ |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Header file for dma buffer sharing framework. * * Copyright(C) 2011 Linaro Limited. All rights reserved. * Author: Sumit Semwal <sumit.semwal@ti.com> * * Many thanks to linaro-mm-sig list, and specially * Arnd Bergmann <arnd@arndb.de>, Rob Clark <rob@ti.com> and * Daniel Vetter <daniel@ffwll.ch> for their support in creation and * refining of this idea. */ #ifndef __DMA_BUF_H__ #define __DMA_BUF_H__ #include <linux/iosys-map.h> #include <linux/file.h> #include <linux/err.h> #include <linux/scatterlist.h> #include <linux/list.h> #include <linux/dma-mapping.h> #include <linux/fs.h> #include <linux/dma-fence.h> #include <linux/wait.h> struct device; struct dma_buf; struct dma_buf_attachment; /** * struct dma_buf_ops - operations possible on struct dma_buf * @vmap: [optional] creates a virtual mapping for the buffer into kernel * address space. Same restrictions as for vmap and friends apply. * @vunmap: [optional] unmaps a vmap from the buffer */ struct dma_buf_ops { /** * @attach: * * This is called from dma_buf_attach() to make sure that a given * &dma_buf_attachment.dev can access the provided &dma_buf. Exporters * which support buffer objects in special locations like VRAM or * device-specific carveout areas should check whether the buffer could * be move to system memory (or directly accessed by the provided * device), and otherwise need to fail the attach operation. * * The exporter should also in general check whether the current * allocation fulfills the DMA constraints of the new device. If this * is not the case, and the allocation cannot be moved, it should also * fail the attach operation. * * Any exporter-private housekeeping data can be stored in the * &dma_buf_attachment.priv pointer. * * This callback is optional. * * Returns: * * 0 on success, negative error code on failure. It might return -EBUSY * to signal that backing storage is already allocated and incompatible * with the requirements of requesting device. */ int (*attach)(struct dma_buf *, struct dma_buf_attachment *); /** * @detach: * * This is called by dma_buf_detach() to release a &dma_buf_attachment. * Provided so that exporters can clean up any housekeeping for an * &dma_buf_attachment. * * This callback is optional. */ void (*detach)(struct dma_buf *, struct dma_buf_attachment *); /** * @pin: * * This is called by dma_buf_pin() and lets the exporter know that the * DMA-buf can't be moved any more. Ideally, the exporter should * pin the buffer so that it is generally accessible by all * devices. * * This is called with the &dmabuf.resv object locked and is mutual * exclusive with @cache_sgt_mapping. * * This is called automatically for non-dynamic importers from * dma_buf_attach(). * * Note that similar to non-dynamic exporters in their @map_dma_buf * callback the driver must guarantee that the memory is available for * use and cleared of any old data by the time this function returns. * Drivers which pipeline their buffer moves internally must wait for * all moves and clears to complete. * * Returns: * * 0 on success, negative error code on failure. */ int (*pin)(struct dma_buf_attachment *attach); /** * @unpin: * * This is called by dma_buf_unpin() and lets the exporter know that the * DMA-buf can be moved again. * * This is called with the dmabuf->resv object locked and is mutual * exclusive with @cache_sgt_mapping. * * This callback is optional. */ void (*unpin)(struct dma_buf_attachment *attach); /** * @map_dma_buf: * * This is called by dma_buf_map_attachment() and is used to map a * shared &dma_buf into device address space, and it is mandatory. It * can only be called if @attach has been called successfully. * * This call may sleep, e.g. when the backing storage first needs to be * allocated, or moved to a location suitable for all currently attached * devices. * * Note that any specific buffer attributes required for this function * should get added to device_dma_parameters accessible via * &device.dma_params from the &dma_buf_attachment. The @attach callback * should also check these constraints. * * If this is being called for the first time, the exporter can now * choose to scan through the list of attachments for this buffer, * collate the requirements of the attached devices, and choose an * appropriate backing storage for the buffer. * * Based on enum dma_data_direction, it might be possible to have * multiple users accessing at the same time (for reading, maybe), or * any other kind of sharing that the exporter might wish to make * available to buffer-users. * * This is always called with the dmabuf->resv object locked when * the dynamic_mapping flag is true. * * Note that for non-dynamic exporters the driver must guarantee that * that the memory is available for use and cleared of any old data by * the time this function returns. Drivers which pipeline their buffer * moves internally must wait for all moves and clears to complete. * Dynamic exporters do not need to follow this rule: For non-dynamic * importers the buffer is already pinned through @pin, which has the * same requirements. Dynamic importers otoh are required to obey the * dma_resv fences. * * Returns: * * A &sg_table scatter list of the backing storage of the DMA buffer, * already mapped into the device address space of the &device attached * with the provided &dma_buf_attachment. The addresses and lengths in * the scatter list are PAGE_SIZE aligned. * * On failure, returns a negative error value wrapped into a pointer. * May also return -EINTR when a signal was received while being * blocked. * * Note that exporters should not try to cache the scatter list, or * return the same one for multiple calls. Caching is done either by the * DMA-BUF code (for non-dynamic importers) or the importer. Ownership * of the scatter list is transferred to the caller, and returned by * @unmap_dma_buf. */ struct sg_table * (*map_dma_buf)(struct dma_buf_attachment *, enum dma_data_direction); /** * @unmap_dma_buf: * * This is called by dma_buf_unmap_attachment() and should unmap and * release the &sg_table allocated in @map_dma_buf, and it is mandatory. * For static dma_buf handling this might also unpin the backing * storage if this is the last mapping of the DMA buffer. */ void (*unmap_dma_buf)(struct dma_buf_attachment *, struct sg_table *, enum dma_data_direction); /* TODO: Add try_map_dma_buf version, to return immed with -EBUSY * if the call would block. */ /** * @release: * * Called after the last dma_buf_put to release the &dma_buf, and * mandatory. */ void (*release)(struct dma_buf *); /** * @begin_cpu_access: * * This is called from dma_buf_begin_cpu_access() and allows the * exporter to ensure that the memory is actually coherent for cpu * access. The exporter also needs to ensure that cpu access is coherent * for the access direction. The direction can be used by the exporter * to optimize the cache flushing, i.e. access with a different * direction (read instead of write) might return stale or even bogus * data (e.g. when the exporter needs to copy the data to temporary * storage). * * Note that this is both called through the DMA_BUF_IOCTL_SYNC IOCTL * command for userspace mappings established through @mmap, and also * for kernel mappings established with @vmap. * * This callback is optional. * * Returns: * * 0 on success or a negative error code on failure. This can for * example fail when the backing storage can't be allocated. Can also * return -ERESTARTSYS or -EINTR when the call has been interrupted and * needs to be restarted. */ int (*begin_cpu_access)(struct dma_buf *, enum dma_data_direction); /** * @end_cpu_access: * * This is called from dma_buf_end_cpu_access() when the importer is * done accessing the CPU. The exporter can use this to flush caches and * undo anything else done in @begin_cpu_access. * * This callback is optional. * * Returns: * * 0 on success or a negative error code on failure. Can return * -ERESTARTSYS or -EINTR when the call has been interrupted and needs * to be restarted. */ int (*end_cpu_access)(struct dma_buf *, enum dma_data_direction); /** * @mmap: * * This callback is used by the dma_buf_mmap() function * * Note that the mapping needs to be incoherent, userspace is expected * to bracket CPU access using the DMA_BUF_IOCTL_SYNC interface. * * Because dma-buf buffers have invariant size over their lifetime, the * dma-buf core checks whether a vma is too large and rejects such * mappings. The exporter hence does not need to duplicate this check. * Drivers do not need to check this themselves. * * If an exporter needs to manually flush caches and hence needs to fake * coherency for mmap support, it needs to be able to zap all the ptes * pointing at the backing storage. Now linux mm needs a struct * address_space associated with the struct file stored in vma->vm_file * to do that with the function unmap_mapping_range. But the dma_buf * framework only backs every dma_buf fd with the anon_file struct file, * i.e. all dma_bufs share the same file. * * Hence exporters need to setup their own file (and address_space) * association by setting vma->vm_file and adjusting vma->vm_pgoff in * the dma_buf mmap callback. In the specific case of a gem driver the * exporter could use the shmem file already provided by gem (and set * vm_pgoff = 0). Exporters can then zap ptes by unmapping the * corresponding range of the struct address_space associated with their * own file. * * This callback is optional. * * Returns: * * 0 on success or a negative error code on failure. */ int (*mmap)(struct dma_buf *, struct vm_area_struct *vma); int (*vmap)(struct dma_buf *dmabuf, struct iosys_map *map); void (*vunmap)(struct dma_buf *dmabuf, struct iosys_map *map); }; /** * struct dma_buf - shared buffer object * * This represents a shared buffer, created by calling dma_buf_export(). The * userspace representation is a normal file descriptor, which can be created by * calling dma_buf_fd(). * * Shared dma buffers are reference counted using dma_buf_put() and * get_dma_buf(). * * Device DMA access is handled by the separate &struct dma_buf_attachment. */ struct dma_buf { /** * @size: * * Size of the buffer; invariant over the lifetime of the buffer. */ size_t size; /** * @file: * * File pointer used for sharing buffers across, and for refcounting. * See dma_buf_get() and dma_buf_put(). */ struct file *file; /** * @attachments: * * List of dma_buf_attachment that denotes all devices attached, * protected by &dma_resv lock @resv. */ struct list_head attachments; /** @ops: dma_buf_ops associated with this buffer object. */ const struct dma_buf_ops *ops; /** * @vmapping_counter: * * Used internally to refcnt the vmaps returned by dma_buf_vmap(). * Protected by @lock. */ unsigned vmapping_counter; /** * @vmap_ptr: * The current vmap ptr if @vmapping_counter > 0. Protected by @lock. */ struct iosys_map vmap_ptr; /** * @exp_name: * * Name of the exporter; useful for debugging. Must not be NULL */ const char *exp_name; /** * @name: * * Userspace-provided name. Default value is NULL. If not NULL, * length cannot be longer than DMA_BUF_NAME_LEN, including NIL * char. Useful for accounting and debugging. Read/Write accesses * are protected by @name_lock * * See the IOCTLs DMA_BUF_SET_NAME or DMA_BUF_SET_NAME_A/B */ const char *name; /** @name_lock: Spinlock to protect name access for read access. */ spinlock_t name_lock; /** * @owner: * * Pointer to exporter module; used for refcounting when exporter is a * kernel module. */ struct module *owner; /** @list_node: node for dma_buf accounting and debugging. */ struct list_head list_node; /** @priv: exporter specific private data for this buffer object. */ void *priv; /** * @resv: * * Reservation object linked to this dma-buf. * * IMPLICIT SYNCHRONIZATION RULES: * * Drivers which support implicit synchronization of buffer access as * e.g. exposed in `Implicit Fence Poll Support`_ must follow the * below rules. * * - Drivers must add a read fence through dma_resv_add_fence() with the * DMA_RESV_USAGE_READ flag for anything the userspace API considers a * read access. This highly depends upon the API and window system. * * - Similarly drivers must add a write fence through * dma_resv_add_fence() with the DMA_RESV_USAGE_WRITE flag for * anything the userspace API considers write access. * * - Drivers may just always add a write fence, since that only * causes unnecessary synchronization, but no correctness issues. * * - Some drivers only expose a synchronous userspace API with no * pipelining across drivers. These do not set any fences for their * access. An example here is v4l. * * - Driver should use dma_resv_usage_rw() when retrieving fences as * dependency for implicit synchronization. * * DYNAMIC IMPORTER RULES: * * Dynamic importers, see dma_buf_attachment_is_dynamic(), have * additional constraints on how they set up fences: * * - Dynamic importers must obey the write fences and wait for them to * signal before allowing access to the buffer's underlying storage * through the device. * * - Dynamic importers should set fences for any access that they can't * disable immediately from their &dma_buf_attach_ops.move_notify * callback. * * IMPORTANT: * * All drivers and memory management related functions must obey the * struct dma_resv rules, specifically the rules for updating and * obeying fences. See enum dma_resv_usage for further descriptions. */ struct dma_resv *resv; /** @poll: for userspace poll support */ wait_queue_head_t poll; /** @cb_in: for userspace poll support */ /** @cb_out: for userspace poll support */ struct dma_buf_poll_cb_t { struct dma_fence_cb cb; wait_queue_head_t *poll; __poll_t active; } cb_in, cb_out; #ifdef CONFIG_DMABUF_SYSFS_STATS /** * @sysfs_entry: * * For exposing information about this buffer in sysfs. See also * `DMA-BUF statistics`_ for the uapi this enables. */ struct dma_buf_sysfs_entry { struct kobject kobj; struct dma_buf *dmabuf; } *sysfs_entry; #endif }; /** * struct dma_buf_attach_ops - importer operations for an attachment * * Attachment operations implemented by the importer. */ struct dma_buf_attach_ops { /** * @allow_peer2peer: * * If this is set to true the importer must be able to handle peer * resources without struct pages. */ bool allow_peer2peer; /** * @move_notify: [optional] notification that the DMA-buf is moving * * If this callback is provided the framework can avoid pinning the * backing store while mappings exists. * * This callback is called with the lock of the reservation object * associated with the dma_buf held and the mapping function must be * called with this lock held as well. This makes sure that no mapping * is created concurrently with an ongoing move operation. * * Mappings stay valid and are not directly affected by this callback. * But the DMA-buf can now be in a different physical location, so all * mappings should be destroyed and re-created as soon as possible. * * New mappings can be created after this callback returns, and will * point to the new location of the DMA-buf. */ void (*move_notify)(struct dma_buf_attachment *attach); }; /** * struct dma_buf_attachment - holds device-buffer attachment data * @dmabuf: buffer for this attachment. * @dev: device attached to the buffer. * @node: list of dma_buf_attachment, protected by dma_resv lock of the dmabuf. * @peer2peer: true if the importer can handle peer resources without pages. * @priv: exporter specific attachment data. * @importer_ops: importer operations for this attachment, if provided * dma_buf_map/unmap_attachment() must be called with the dma_resv lock held. * @importer_priv: importer specific attachment data. * * This structure holds the attachment information between the dma_buf buffer * and its user device(s). The list contains one attachment struct per device * attached to the buffer. * * An attachment is created by calling dma_buf_attach(), and released again by * calling dma_buf_detach(). The DMA mapping itself needed to initiate a * transfer is created by dma_buf_map_attachment() and freed again by calling * dma_buf_unmap_attachment(). */ struct dma_buf_attachment { struct dma_buf *dmabuf; struct device *dev; struct list_head node; bool peer2peer; const struct dma_buf_attach_ops *importer_ops; void *importer_priv; void *priv; }; /** * struct dma_buf_export_info - holds information needed to export a dma_buf * @exp_name: name of the exporter - useful for debugging. * @owner: pointer to exporter module - used for refcounting kernel module * @ops: Attach allocator-defined dma buf ops to the new buffer * @size: Size of the buffer - invariant over the lifetime of the buffer * @flags: mode flags for the file * @resv: reservation-object, NULL to allocate default one * @priv: Attach private data of allocator to this buffer * * This structure holds the information required to export the buffer. Used * with dma_buf_export() only. */ struct dma_buf_export_info { const char *exp_name; struct module *owner; const struct dma_buf_ops *ops; size_t size; int flags; struct dma_resv *resv; void *priv; }; /** * DEFINE_DMA_BUF_EXPORT_INFO - helper macro for exporters * @name: export-info name * * DEFINE_DMA_BUF_EXPORT_INFO macro defines the &struct dma_buf_export_info, * zeroes it out and pre-populates exp_name in it. */ #define DEFINE_DMA_BUF_EXPORT_INFO(name) \ struct dma_buf_export_info name = { .exp_name = KBUILD_MODNAME, \ .owner = THIS_MODULE } /** * get_dma_buf - convenience wrapper for get_file. * @dmabuf: [in] pointer to dma_buf * * Increments the reference count on the dma-buf, needed in case of drivers * that either need to create additional references to the dmabuf on the * kernel side. For example, an exporter that needs to keep a dmabuf ptr * so that subsequent exports don't create a new dmabuf. */ static inline void get_dma_buf(struct dma_buf *dmabuf) { get_file(dmabuf->file); } /** * dma_buf_is_dynamic - check if a DMA-buf uses dynamic mappings. * @dmabuf: the DMA-buf to check * * Returns true if a DMA-buf exporter wants to be called with the dma_resv * locked for the map/unmap callbacks, false if it doesn't wants to be called * with the lock held. */ static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf) { return !!dmabuf->ops->pin; } struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, struct device *dev); struct dma_buf_attachment * dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev, const struct dma_buf_attach_ops *importer_ops, void *importer_priv); void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach); int dma_buf_pin(struct dma_buf_attachment *attach); void dma_buf_unpin(struct dma_buf_attachment *attach); struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info); int dma_buf_fd(struct dma_buf *dmabuf, int flags); struct dma_buf *dma_buf_get(int fd); void dma_buf_put(struct dma_buf *dmabuf); struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *, enum dma_data_direction); void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *, enum dma_data_direction); void dma_buf_move_notify(struct dma_buf *dma_buf); int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction dir); int dma_buf_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction dir); struct sg_table * dma_buf_map_attachment_unlocked(struct dma_buf_attachment *attach, enum dma_data_direction direction); void dma_buf_unmap_attachment_unlocked(struct dma_buf_attachment *attach, struct sg_table *sg_table, enum dma_data_direction direction); int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *, unsigned long); int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map); void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map); int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map); void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map); struct dma_buf *dma_buf_iter_begin(void); struct dma_buf *dma_buf_iter_next(struct dma_buf *dmbuf); #endif /* __DMA_BUF_H__ */ |
| 2 1 2 7 7 4 3 2 5 5 2 3 5 2 2 2 2 2 2 2 4 4 4 5 5 5 5 5 1 5 5 1 5 1 1 1 5 5 5 4 5 4 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2016 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> */ #include "devl_internal.h" static inline bool devlink_rate_is_leaf(struct devlink_rate *devlink_rate) { return devlink_rate->type == DEVLINK_RATE_TYPE_LEAF; } static inline bool devlink_rate_is_node(struct devlink_rate *devlink_rate) { return devlink_rate->type == DEVLINK_RATE_TYPE_NODE; } static struct devlink_rate * devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info) { struct devlink_rate *devlink_rate; struct devlink_port *devlink_port; devlink_port = devlink_port_get_from_attrs(devlink, info->attrs); if (IS_ERR(devlink_port)) return ERR_CAST(devlink_port); devlink_rate = devlink_port->devlink_rate; return devlink_rate ?: ERR_PTR(-ENODEV); } static struct devlink_rate * devlink_rate_node_get_by_name(struct devlink *devlink, const char *node_name) { static struct devlink_rate *devlink_rate; list_for_each_entry(devlink_rate, &devlink->rate_list, list) { if (devlink_rate_is_node(devlink_rate) && !strcmp(node_name, devlink_rate->name)) return devlink_rate; } return ERR_PTR(-ENODEV); } static struct devlink_rate * devlink_rate_node_get_from_attrs(struct devlink *devlink, struct nlattr **attrs) { const char *rate_node_name; size_t len; if (!attrs[DEVLINK_ATTR_RATE_NODE_NAME]) return ERR_PTR(-EINVAL); rate_node_name = nla_data(attrs[DEVLINK_ATTR_RATE_NODE_NAME]); len = strlen(rate_node_name); /* Name cannot be empty or decimal number */ if (!len || strspn(rate_node_name, "0123456789") == len) return ERR_PTR(-EINVAL); return devlink_rate_node_get_by_name(devlink, rate_node_name); } static struct devlink_rate * devlink_rate_node_get_from_info(struct devlink *devlink, struct genl_info *info) { return devlink_rate_node_get_from_attrs(devlink, info->attrs); } static struct devlink_rate * devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info) { struct nlattr **attrs = info->attrs; if (attrs[DEVLINK_ATTR_PORT_INDEX]) return devlink_rate_leaf_get_from_info(devlink, info); else if (attrs[DEVLINK_ATTR_RATE_NODE_NAME]) return devlink_rate_node_get_from_info(devlink, info); else return ERR_PTR(-EINVAL); } static int devlink_nl_rate_fill(struct sk_buff *msg, struct devlink_rate *devlink_rate, enum devlink_command cmd, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { struct devlink *devlink = devlink_rate->devlink; void *hdr; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; if (nla_put_u16(msg, DEVLINK_ATTR_RATE_TYPE, devlink_rate->type)) goto nla_put_failure; if (devlink_rate_is_leaf(devlink_rate)) { if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_rate->devlink_port->index)) goto nla_put_failure; } else if (devlink_rate_is_node(devlink_rate)) { if (nla_put_string(msg, DEVLINK_ATTR_RATE_NODE_NAME, devlink_rate->name)) goto nla_put_failure; } if (devlink_nl_put_u64(msg, DEVLINK_ATTR_RATE_TX_SHARE, devlink_rate->tx_share)) goto nla_put_failure; if (devlink_nl_put_u64(msg, DEVLINK_ATTR_RATE_TX_MAX, devlink_rate->tx_max)) goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_PRIORITY, devlink_rate->tx_priority)) goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_WEIGHT, devlink_rate->tx_weight)) goto nla_put_failure; if (devlink_rate->parent) if (nla_put_string(msg, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, devlink_rate->parent->name)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static void devlink_rate_notify(struct devlink_rate *devlink_rate, enum devlink_command cmd) { struct devlink *devlink = devlink_rate->devlink; struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL); if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; err = devlink_nl_rate_fill(msg, devlink_rate, cmd, 0, 0, 0, NULL); if (err) { nlmsg_free(msg); return; } devlink_nl_notify_send(devlink, msg); } void devlink_rates_notify_register(struct devlink *devlink) { struct devlink_rate *rate_node; list_for_each_entry(rate_node, &devlink->rate_list, list) devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW); } void devlink_rates_notify_unregister(struct devlink *devlink) { struct devlink_rate *rate_node; list_for_each_entry_reverse(rate_node, &devlink->rate_list, list) devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL); } static int devlink_nl_rate_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags) { struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_rate *devlink_rate; int idx = 0; int err = 0; list_for_each_entry(devlink_rate, &devlink->rate_list, list) { enum devlink_command cmd = DEVLINK_CMD_RATE_NEW; u32 id = NETLINK_CB(cb->skb).portid; if (idx < state->idx) { idx++; continue; } err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id, cb->nlh->nlmsg_seq, flags, NULL); if (err) { state->idx = idx; break; } idx++; } return err; } int devlink_nl_rate_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { return devlink_nl_dumpit(skb, cb, devlink_nl_rate_get_dump_one); } int devlink_nl_rate_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_rate *devlink_rate; struct sk_buff *msg; int err; devlink_rate = devlink_rate_get_from_info(devlink, info); if (IS_ERR(devlink_rate)) return PTR_ERR(devlink_rate); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_rate_fill(msg, devlink_rate, DEVLINK_CMD_RATE_NEW, info->snd_portid, info->snd_seq, 0, info->extack); if (err) { nlmsg_free(msg); return err; } return genlmsg_reply(msg, info); } static bool devlink_rate_is_parent_node(struct devlink_rate *devlink_rate, struct devlink_rate *parent) { while (parent) { if (parent == devlink_rate) return true; parent = parent->parent; } return false; } static int devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate, struct genl_info *info, struct nlattr *nla_parent) { struct devlink *devlink = devlink_rate->devlink; const char *parent_name = nla_data(nla_parent); const struct devlink_ops *ops = devlink->ops; size_t len = strlen(parent_name); struct devlink_rate *parent; int err = -EOPNOTSUPP; parent = devlink_rate->parent; if (parent && !len) { if (devlink_rate_is_leaf(devlink_rate)) err = ops->rate_leaf_parent_set(devlink_rate, NULL, devlink_rate->priv, NULL, info->extack); else if (devlink_rate_is_node(devlink_rate)) err = ops->rate_node_parent_set(devlink_rate, NULL, devlink_rate->priv, NULL, info->extack); if (err) return err; refcount_dec(&parent->refcnt); devlink_rate->parent = NULL; } else if (len) { parent = devlink_rate_node_get_by_name(devlink, parent_name); if (IS_ERR(parent)) return -ENODEV; if (parent == devlink_rate) { NL_SET_ERR_MSG(info->extack, "Parent to self is not allowed"); return -EINVAL; } if (devlink_rate_is_node(devlink_rate) && devlink_rate_is_parent_node(devlink_rate, parent->parent)) { NL_SET_ERR_MSG(info->extack, "Node is already a parent of parent node."); return -EEXIST; } if (devlink_rate_is_leaf(devlink_rate)) err = ops->rate_leaf_parent_set(devlink_rate, parent, devlink_rate->priv, parent->priv, info->extack); else if (devlink_rate_is_node(devlink_rate)) err = ops->rate_node_parent_set(devlink_rate, parent, devlink_rate->priv, parent->priv, info->extack); if (err) return err; if (devlink_rate->parent) /* we're reassigning to other parent in this case */ refcount_dec(&devlink_rate->parent->refcnt); refcount_inc(&parent->refcnt); devlink_rate->parent = parent; } return 0; } static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, const struct devlink_ops *ops, struct genl_info *info) { struct nlattr *nla_parent, **attrs = info->attrs; int err = -EOPNOTSUPP; u32 priority; u32 weight; u64 rate; if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) { rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_SHARE]); if (devlink_rate_is_leaf(devlink_rate)) err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv, rate, info->extack); else if (devlink_rate_is_node(devlink_rate)) err = ops->rate_node_tx_share_set(devlink_rate, devlink_rate->priv, rate, info->extack); if (err) return err; devlink_rate->tx_share = rate; } if (attrs[DEVLINK_ATTR_RATE_TX_MAX]) { rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_MAX]); if (devlink_rate_is_leaf(devlink_rate)) err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv, rate, info->extack); else if (devlink_rate_is_node(devlink_rate)) err = ops->rate_node_tx_max_set(devlink_rate, devlink_rate->priv, rate, info->extack); if (err) return err; devlink_rate->tx_max = rate; } if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY]) { priority = nla_get_u32(attrs[DEVLINK_ATTR_RATE_TX_PRIORITY]); if (devlink_rate_is_leaf(devlink_rate)) err = ops->rate_leaf_tx_priority_set(devlink_rate, devlink_rate->priv, priority, info->extack); else if (devlink_rate_is_node(devlink_rate)) err = ops->rate_node_tx_priority_set(devlink_rate, devlink_rate->priv, priority, info->extack); if (err) return err; devlink_rate->tx_priority = priority; } if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT]) { weight = nla_get_u32(attrs[DEVLINK_ATTR_RATE_TX_WEIGHT]); if (devlink_rate_is_leaf(devlink_rate)) err = ops->rate_leaf_tx_weight_set(devlink_rate, devlink_rate->priv, weight, info->extack); else if (devlink_rate_is_node(devlink_rate)) err = ops->rate_node_tx_weight_set(devlink_rate, devlink_rate->priv, weight, info->extack); if (err) return err; devlink_rate->tx_weight = weight; } nla_parent = attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME]; if (nla_parent) { err = devlink_nl_rate_parent_node_set(devlink_rate, info, nla_parent); if (err) return err; } return 0; } static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, struct genl_info *info, enum devlink_rate_type type) { struct nlattr **attrs = info->attrs; if (type == DEVLINK_RATE_TYPE_LEAF) { if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_leaf_tx_share_set) { NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the leafs"); return false; } if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_leaf_tx_max_set) { NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the leafs"); return false; } if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] && !ops->rate_leaf_parent_set) { NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the leafs"); return false; } if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_leaf_tx_priority_set) { NL_SET_ERR_MSG_ATTR(info->extack, attrs[DEVLINK_ATTR_RATE_TX_PRIORITY], "TX priority set isn't supported for the leafs"); return false; } if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT] && !ops->rate_leaf_tx_weight_set) { NL_SET_ERR_MSG_ATTR(info->extack, attrs[DEVLINK_ATTR_RATE_TX_WEIGHT], "TX weight set isn't supported for the leafs"); return false; } } else if (type == DEVLINK_RATE_TYPE_NODE) { if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) { NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the nodes"); return false; } if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_node_tx_max_set) { NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the nodes"); return false; } if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] && !ops->rate_node_parent_set) { NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the nodes"); return false; } if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_node_tx_priority_set) { NL_SET_ERR_MSG_ATTR(info->extack, attrs[DEVLINK_ATTR_RATE_TX_PRIORITY], "TX priority set isn't supported for the nodes"); return false; } if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT] && !ops->rate_node_tx_weight_set) { NL_SET_ERR_MSG_ATTR(info->extack, attrs[DEVLINK_ATTR_RATE_TX_WEIGHT], "TX weight set isn't supported for the nodes"); return false; } } else { WARN(1, "Unknown type of rate object"); return false; } return true; } int devlink_nl_rate_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_rate *devlink_rate; const struct devlink_ops *ops; int err; devlink_rate = devlink_rate_get_from_info(devlink, info); if (IS_ERR(devlink_rate)) return PTR_ERR(devlink_rate); ops = devlink->ops; if (!ops || !devlink_rate_set_ops_supported(ops, info, devlink_rate->type)) return -EOPNOTSUPP; err = devlink_nl_rate_set(devlink_rate, ops, info); if (!err) devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW); return err; } int devlink_nl_rate_new_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_rate *rate_node; const struct devlink_ops *ops; int err; ops = devlink->ops; if (!ops || !ops->rate_node_new || !ops->rate_node_del) { NL_SET_ERR_MSG(info->extack, "Rate nodes aren't supported"); return -EOPNOTSUPP; } if (!devlink_rate_set_ops_supported(ops, info, DEVLINK_RATE_TYPE_NODE)) return -EOPNOTSUPP; rate_node = devlink_rate_node_get_from_attrs(devlink, info->attrs); if (!IS_ERR(rate_node)) return -EEXIST; else if (rate_node == ERR_PTR(-EINVAL)) return -EINVAL; rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL); if (!rate_node) return -ENOMEM; rate_node->devlink = devlink; rate_node->type = DEVLINK_RATE_TYPE_NODE; rate_node->name = nla_strdup(info->attrs[DEVLINK_ATTR_RATE_NODE_NAME], GFP_KERNEL); if (!rate_node->name) { err = -ENOMEM; goto err_strdup; } err = ops->rate_node_new(rate_node, &rate_node->priv, info->extack); if (err) goto err_node_new; err = devlink_nl_rate_set(rate_node, ops, info); if (err) goto err_rate_set; refcount_set(&rate_node->refcnt, 1); list_add(&rate_node->list, &devlink->rate_list); devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW); return 0; err_rate_set: ops->rate_node_del(rate_node, rate_node->priv, info->extack); err_node_new: kfree(rate_node->name); err_strdup: kfree(rate_node); return err; } int devlink_nl_rate_del_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_rate *rate_node; int err; rate_node = devlink_rate_node_get_from_info(devlink, info); if (IS_ERR(rate_node)) return PTR_ERR(rate_node); if (refcount_read(&rate_node->refcnt) > 1) { NL_SET_ERR_MSG(info->extack, "Node has children. Cannot delete node."); return -EBUSY; } devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL); err = devlink->ops->rate_node_del(rate_node, rate_node->priv, info->extack); if (rate_node->parent) refcount_dec(&rate_node->parent->refcnt); list_del(&rate_node->list); kfree(rate_node->name); kfree(rate_node); return err; } int devlink_rate_nodes_check(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack) { struct devlink_rate *devlink_rate; list_for_each_entry(devlink_rate, &devlink->rate_list, list) if (devlink_rate_is_node(devlink_rate)) { NL_SET_ERR_MSG(extack, "Rate node(s) exists."); return -EBUSY; } return 0; } /** * devl_rate_node_create - create devlink rate node * @devlink: devlink instance * @priv: driver private data * @node_name: name of the resulting node * @parent: parent devlink_rate struct * * Create devlink rate object of type node */ struct devlink_rate * devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name, struct devlink_rate *parent) { struct devlink_rate *rate_node; rate_node = devlink_rate_node_get_by_name(devlink, node_name); if (!IS_ERR(rate_node)) return ERR_PTR(-EEXIST); rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL); if (!rate_node) return ERR_PTR(-ENOMEM); if (parent) { rate_node->parent = parent; refcount_inc(&rate_node->parent->refcnt); } rate_node->type = DEVLINK_RATE_TYPE_NODE; rate_node->devlink = devlink; rate_node->priv = priv; rate_node->name = kstrdup(node_name, GFP_KERNEL); if (!rate_node->name) { kfree(rate_node); return ERR_PTR(-ENOMEM); } refcount_set(&rate_node->refcnt, 1); list_add(&rate_node->list, &devlink->rate_list); devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW); return rate_node; } EXPORT_SYMBOL_GPL(devl_rate_node_create); /** * devl_rate_leaf_create - create devlink rate leaf * @devlink_port: devlink port object to create rate object on * @priv: driver private data * @parent: parent devlink_rate struct * * Create devlink rate object of type leaf on provided @devlink_port. */ int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv, struct devlink_rate *parent) { struct devlink *devlink = devlink_port->devlink; struct devlink_rate *devlink_rate; devl_assert_locked(devlink_port->devlink); if (WARN_ON(devlink_port->devlink_rate)) return -EBUSY; devlink_rate = kzalloc(sizeof(*devlink_rate), GFP_KERNEL); if (!devlink_rate) return -ENOMEM; if (parent) { devlink_rate->parent = parent; refcount_inc(&devlink_rate->parent->refcnt); } devlink_rate->type = DEVLINK_RATE_TYPE_LEAF; devlink_rate->devlink = devlink; devlink_rate->devlink_port = devlink_port; devlink_rate->priv = priv; list_add_tail(&devlink_rate->list, &devlink->rate_list); devlink_port->devlink_rate = devlink_rate; devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW); return 0; } EXPORT_SYMBOL_GPL(devl_rate_leaf_create); /** * devl_rate_leaf_destroy - destroy devlink rate leaf * * @devlink_port: devlink port linked to the rate object * * Destroy the devlink rate object of type leaf on provided @devlink_port. */ void devl_rate_leaf_destroy(struct devlink_port *devlink_port) { struct devlink_rate *devlink_rate = devlink_port->devlink_rate; devl_assert_locked(devlink_port->devlink); if (!devlink_rate) return; devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL); if (devlink_rate->parent) refcount_dec(&devlink_rate->parent->refcnt); list_del(&devlink_rate->list); devlink_port->devlink_rate = NULL; kfree(devlink_rate); } EXPORT_SYMBOL_GPL(devl_rate_leaf_destroy); /** * devl_rate_nodes_destroy - destroy all devlink rate nodes on device * @devlink: devlink instance * * Unset parent for all rate objects and destroy all rate nodes * on specified device. */ void devl_rate_nodes_destroy(struct devlink *devlink) { static struct devlink_rate *devlink_rate, *tmp; const struct devlink_ops *ops = devlink->ops; devl_assert_locked(devlink); list_for_each_entry(devlink_rate, &devlink->rate_list, list) { if (!devlink_rate->parent) continue; refcount_dec(&devlink_rate->parent->refcnt); if (devlink_rate_is_leaf(devlink_rate)) ops->rate_leaf_parent_set(devlink_rate, NULL, devlink_rate->priv, NULL, NULL); else if (devlink_rate_is_node(devlink_rate)) ops->rate_node_parent_set(devlink_rate, NULL, devlink_rate->priv, NULL, NULL); } list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) { if (devlink_rate_is_node(devlink_rate)) { ops->rate_node_del(devlink_rate, devlink_rate->priv, NULL); list_del(&devlink_rate->list); kfree(devlink_rate->name); kfree(devlink_rate); } } } EXPORT_SYMBOL_GPL(devl_rate_nodes_destroy); |
| 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C)2006 USAGI/WIDE Project * * Author: * Masahide NAKAMURA @USAGI <masahide.nakamura.cz@hitachi.com> * * Based on net/netfilter/xt_tcpudp.c */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> #include <linux/module.h> #include <net/ip.h> #include <linux/ipv6.h> #include <net/ipv6.h> #include <net/mip6.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6t_mh.h> MODULE_DESCRIPTION("Xtables: IPv6 Mobility Header match"); MODULE_LICENSE("GPL"); /* Returns 1 if the type is matched by the range, 0 otherwise */ static inline bool type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert) { return (type >= min && type <= max) ^ invert; } static bool mh_mt6(const struct sk_buff *skb, struct xt_action_param *par) { struct ip6_mh _mh; const struct ip6_mh *mh; const struct ip6t_mh *mhinfo = par->matchinfo; /* Must not be a fragment. */ if (par->fragoff != 0) return false; mh = skb_header_pointer(skb, par->thoff, sizeof(_mh), &_mh); if (mh == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ pr_debug("Dropping evil MH tinygram.\n"); par->hotdrop = true; return false; } if (mh->ip6mh_proto != IPPROTO_NONE) { pr_debug("Dropping invalid MH Payload Proto: %u\n", mh->ip6mh_proto); par->hotdrop = true; return false; } return type_match(mhinfo->types[0], mhinfo->types[1], mh->ip6mh_type, !!(mhinfo->invflags & IP6T_MH_INV_TYPE)); } static int mh_mt6_check(const struct xt_mtchk_param *par) { const struct ip6t_mh *mhinfo = par->matchinfo; /* Must specify no unknown invflags */ return (mhinfo->invflags & ~IP6T_MH_INV_MASK) ? -EINVAL : 0; } static struct xt_match mh_mt6_reg __read_mostly = { .name = "mh", .family = NFPROTO_IPV6, .checkentry = mh_mt6_check, .match = mh_mt6, .matchsize = sizeof(struct ip6t_mh), .proto = IPPROTO_MH, .me = THIS_MODULE, }; static int __init mh_mt6_init(void) { return xt_register_match(&mh_mt6_reg); } static void __exit mh_mt6_exit(void) { xt_unregister_match(&mh_mt6_reg); } module_init(mh_mt6_init); module_exit(mh_mt6_exit); |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Queue of folios definitions * * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * See: * * Documentation/core-api/folio_queue.rst * * for a description of the API. */ #ifndef _LINUX_FOLIO_QUEUE_H #define _LINUX_FOLIO_QUEUE_H #include <linux/pagevec.h> #include <linux/mm.h> /* * Segment in a queue of running buffers. Each segment can hold a number of * folios and a portion of the queue can be referenced with the ITER_FOLIOQ * iterator. The possibility exists of inserting non-folio elements into the * queue (such as gaps). * * Explicit prev and next pointers are used instead of a list_head to make it * easier to add segments to tail and remove them from the head without the * need for a lock. */ struct folio_queue { struct folio_batch vec; /* Folios in the queue segment */ u8 orders[PAGEVEC_SIZE]; /* Order of each folio */ struct folio_queue *next; /* Next queue segment or NULL */ struct folio_queue *prev; /* Previous queue segment of NULL */ unsigned long marks; /* 1-bit mark per folio */ unsigned long marks2; /* Second 1-bit mark per folio */ #if PAGEVEC_SIZE > BITS_PER_LONG #error marks is not big enough #endif unsigned int rreq_id; unsigned int debug_id; }; /** * folioq_init - Initialise a folio queue segment * @folioq: The segment to initialise * @rreq_id: The request identifier to use in tracelines. * * Initialise a folio queue segment and set an identifier to be used in traces. * * Note that the folio pointers are left uninitialised. */ static inline void folioq_init(struct folio_queue *folioq, unsigned int rreq_id) { folio_batch_init(&folioq->vec); folioq->next = NULL; folioq->prev = NULL; folioq->marks = 0; folioq->marks2 = 0; folioq->rreq_id = rreq_id; folioq->debug_id = 0; } /** * folioq_nr_slots: Query the capacity of a folio queue segment * @folioq: The segment to query * * Query the number of folios that a particular folio queue segment might hold. * [!] NOTE: This must not be assumed to be the same for every segment! */ static inline unsigned int folioq_nr_slots(const struct folio_queue *folioq) { return PAGEVEC_SIZE; } /** * folioq_count: Query the occupancy of a folio queue segment * @folioq: The segment to query * * Query the number of folios that have been added to a folio queue segment. * Note that this is not decreased as folios are removed from a segment. */ static inline unsigned int folioq_count(struct folio_queue *folioq) { return folio_batch_count(&folioq->vec); } /** * folioq_full: Query if a folio queue segment is full * @folioq: The segment to query * * Query if a folio queue segment is fully occupied. Note that this does not * change if folios are removed from a segment. */ static inline bool folioq_full(struct folio_queue *folioq) { //return !folio_batch_space(&folioq->vec); return folioq_count(folioq) >= folioq_nr_slots(folioq); } /** * folioq_is_marked: Check first folio mark in a folio queue segment * @folioq: The segment to query * @slot: The slot number of the folio to query * * Determine if the first mark is set for the folio in the specified slot in a * folio queue segment. */ static inline bool folioq_is_marked(const struct folio_queue *folioq, unsigned int slot) { return test_bit(slot, &folioq->marks); } /** * folioq_mark: Set the first mark on a folio in a folio queue segment * @folioq: The segment to modify * @slot: The slot number of the folio to modify * * Set the first mark for the folio in the specified slot in a folio queue * segment. */ static inline void folioq_mark(struct folio_queue *folioq, unsigned int slot) { set_bit(slot, &folioq->marks); } /** * folioq_unmark: Clear the first mark on a folio in a folio queue segment * @folioq: The segment to modify * @slot: The slot number of the folio to modify * * Clear the first mark for the folio in the specified slot in a folio queue * segment. */ static inline void folioq_unmark(struct folio_queue *folioq, unsigned int slot) { clear_bit(slot, &folioq->marks); } /** * folioq_is_marked2: Check second folio mark in a folio queue segment * @folioq: The segment to query * @slot: The slot number of the folio to query * * Determine if the second mark is set for the folio in the specified slot in a * folio queue segment. */ static inline bool folioq_is_marked2(const struct folio_queue *folioq, unsigned int slot) { return test_bit(slot, &folioq->marks2); } /** * folioq_mark2: Set the second mark on a folio in a folio queue segment * @folioq: The segment to modify * @slot: The slot number of the folio to modify * * Set the second mark for the folio in the specified slot in a folio queue * segment. */ static inline void folioq_mark2(struct folio_queue *folioq, unsigned int slot) { set_bit(slot, &folioq->marks2); } /** * folioq_unmark2: Clear the second mark on a folio in a folio queue segment * @folioq: The segment to modify * @slot: The slot number of the folio to modify * * Clear the second mark for the folio in the specified slot in a folio queue * segment. */ static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot) { clear_bit(slot, &folioq->marks2); } /** * folioq_append: Add a folio to a folio queue segment * @folioq: The segment to add to * @folio: The folio to add * * Add a folio to the tail of the sequence in a folio queue segment, increasing * the occupancy count and returning the slot number for the folio just added. * The folio size is extracted and stored in the queue and the marks are left * unmodified. * * Note that it's left up to the caller to check that the segment capacity will * not be exceeded and to extend the queue. */ static inline unsigned int folioq_append(struct folio_queue *folioq, struct folio *folio) { unsigned int slot = folioq->vec.nr++; folioq->vec.folios[slot] = folio; folioq->orders[slot] = folio_order(folio); return slot; } /** * folioq_append_mark: Add a folio to a folio queue segment * @folioq: The segment to add to * @folio: The folio to add * * Add a folio to the tail of the sequence in a folio queue segment, increasing * the occupancy count and returning the slot number for the folio just added. * The folio size is extracted and stored in the queue, the first mark is set * and and the second and third marks are left unmodified. * * Note that it's left up to the caller to check that the segment capacity will * not be exceeded and to extend the queue. */ static inline unsigned int folioq_append_mark(struct folio_queue *folioq, struct folio *folio) { unsigned int slot = folioq->vec.nr++; folioq->vec.folios[slot] = folio; folioq->orders[slot] = folio_order(folio); folioq_mark(folioq, slot); return slot; } /** * folioq_folio: Get a folio from a folio queue segment * @folioq: The segment to access * @slot: The folio slot to access * * Retrieve the folio in the specified slot from a folio queue segment. Note * that no bounds check is made and if the slot hasn't been added into yet, the * pointer will be undefined. If the slot has been cleared, NULL will be * returned. */ static inline struct folio *folioq_folio(const struct folio_queue *folioq, unsigned int slot) { return folioq->vec.folios[slot]; } /** * folioq_folio_order: Get the order of a folio from a folio queue segment * @folioq: The segment to access * @slot: The folio slot to access * * Retrieve the order of the folio in the specified slot from a folio queue * segment. Note that no bounds check is made and if the slot hasn't been * added into yet, the order returned will be 0. */ static inline unsigned int folioq_folio_order(const struct folio_queue *folioq, unsigned int slot) { return folioq->orders[slot]; } /** * folioq_folio_size: Get the size of a folio from a folio queue segment * @folioq: The segment to access * @slot: The folio slot to access * * Retrieve the size of the folio in the specified slot from a folio queue * segment. Note that no bounds check is made and if the slot hasn't been * added into yet, the size returned will be PAGE_SIZE. */ static inline size_t folioq_folio_size(const struct folio_queue *folioq, unsigned int slot) { return PAGE_SIZE << folioq_folio_order(folioq, slot); } /** * folioq_clear: Clear a folio from a folio queue segment * @folioq: The segment to clear * @slot: The folio slot to clear * * Clear a folio from a sequence in a folio queue segment and clear its marks. * The occupancy count is left unchanged. */ static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot) { folioq->vec.folios[slot] = NULL; folioq_unmark(folioq, slot); folioq_unmark2(folioq, slot); } #endif /* _LINUX_FOLIO_QUEUE_H */ |
| 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 3 3 3 2 2 2 2 2 2 2 2 2 3 10 10 8 7 6 5 1 4 4 3 2 3 3 3 3 2 3 5 5 1 10 1 3 1 3 1 3 3 11 11 8 8 1 1 8 1 7 7 7 7 8 8 8 8 8 8 3 3 3 3 11 10 9 8 10 11 11 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 | // SPDX-License-Identifier: GPL-2.0 /* Watch queue and general notification mechanism, built on pipes * * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * See Documentation/core-api/watch_queue.rst */ #define pr_fmt(fmt) "watchq: " fmt #include <linux/module.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/printk.h> #include <linux/miscdevice.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/poll.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> #include <linux/file.h> #include <linux/security.h> #include <linux/cred.h> #include <linux/sched/signal.h> #include <linux/watch_queue.h> #include <linux/pipe_fs_i.h> MODULE_DESCRIPTION("Watch queue"); MODULE_AUTHOR("Red Hat, Inc."); #define WATCH_QUEUE_NOTE_SIZE 128 #define WATCH_QUEUE_NOTES_PER_PAGE (PAGE_SIZE / WATCH_QUEUE_NOTE_SIZE) /* * This must be called under the RCU read-lock, which makes * sure that the wqueue still exists. It can then take the lock, * and check that the wqueue hasn't been destroyed, which in * turn makes sure that the notification pipe still exists. */ static inline bool lock_wqueue(struct watch_queue *wqueue) { spin_lock_bh(&wqueue->lock); if (unlikely(!wqueue->pipe)) { spin_unlock_bh(&wqueue->lock); return false; } return true; } static inline void unlock_wqueue(struct watch_queue *wqueue) { spin_unlock_bh(&wqueue->lock); } static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct watch_queue *wqueue = (struct watch_queue *)buf->private; struct page *page; unsigned int bit; /* We need to work out which note within the page this refers to, but * the note might have been maximum size, so merely ANDing the offset * off doesn't work. OTOH, the note must've been more than zero size. */ bit = buf->offset + buf->len; if ((bit & (WATCH_QUEUE_NOTE_SIZE - 1)) == 0) bit -= WATCH_QUEUE_NOTE_SIZE; bit /= WATCH_QUEUE_NOTE_SIZE; page = buf->page; bit += page->private; set_bit(bit, wqueue->notes_bitmap); generic_pipe_buf_release(pipe, buf); } // No try_steal function => no stealing #define watch_queue_pipe_buf_try_steal NULL /* New data written to a pipe may be appended to a buffer with this type. */ static const struct pipe_buf_operations watch_queue_pipe_buf_ops = { .release = watch_queue_pipe_buf_release, .try_steal = watch_queue_pipe_buf_try_steal, .get = generic_pipe_buf_get, }; /* * Post a notification to a watch queue. * * Must be called with the RCU lock for reading, and the * watch_queue lock held, which guarantees that the pipe * hasn't been released. */ static bool post_one_notification(struct watch_queue *wqueue, struct watch_notification *n) { void *p; struct pipe_inode_info *pipe = wqueue->pipe; struct pipe_buffer *buf; struct page *page; unsigned int head, tail, note, offset, len; bool done = false; spin_lock_irq(&pipe->rd_wait.lock); head = pipe->head; tail = pipe->tail; if (pipe_full(head, tail, pipe->ring_size)) goto lost; note = find_first_bit(wqueue->notes_bitmap, wqueue->nr_notes); if (note >= wqueue->nr_notes) goto lost; page = wqueue->notes[note / WATCH_QUEUE_NOTES_PER_PAGE]; offset = note % WATCH_QUEUE_NOTES_PER_PAGE * WATCH_QUEUE_NOTE_SIZE; get_page(page); len = n->info & WATCH_INFO_LENGTH; p = kmap_atomic(page); memcpy(p + offset, n, len); kunmap_atomic(p); buf = pipe_buf(pipe, head); buf->page = page; buf->private = (unsigned long)wqueue; buf->ops = &watch_queue_pipe_buf_ops; buf->offset = offset; buf->len = len; buf->flags = PIPE_BUF_FLAG_WHOLE; smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */ if (!test_and_clear_bit(note, wqueue->notes_bitmap)) { spin_unlock_irq(&pipe->rd_wait.lock); BUG(); } wake_up_interruptible_sync_poll_locked(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); done = true; out: spin_unlock_irq(&pipe->rd_wait.lock); if (done) kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); return done; lost: buf = pipe_buf(pipe, head - 1); buf->flags |= PIPE_BUF_FLAG_LOSS; goto out; } /* * Apply filter rules to a notification. */ static bool filter_watch_notification(const struct watch_filter *wf, const struct watch_notification *n) { const struct watch_type_filter *wt; unsigned int st_bits = sizeof(wt->subtype_filter[0]) * 8; unsigned int st_index = n->subtype / st_bits; unsigned int st_bit = 1U << (n->subtype % st_bits); int i; if (!test_bit(n->type, wf->type_filter)) return false; for (i = 0; i < wf->nr_filters; i++) { wt = &wf->filters[i]; if (n->type == wt->type && (wt->subtype_filter[st_index] & st_bit) && (n->info & wt->info_mask) == wt->info_filter) return true; } return false; /* If there is a filter, the default is to reject. */ } /** * __post_watch_notification - Post an event notification * @wlist: The watch list to post the event to. * @n: The notification record to post. * @cred: The creds of the process that triggered the notification. * @id: The ID to match on the watch. * * Post a notification of an event into a set of watch queues and let the users * know. * * The size of the notification should be set in n->info & WATCH_INFO_LENGTH and * should be in units of sizeof(*n). */ void __post_watch_notification(struct watch_list *wlist, struct watch_notification *n, const struct cred *cred, u64 id) { const struct watch_filter *wf; struct watch_queue *wqueue; struct watch *watch; if (((n->info & WATCH_INFO_LENGTH) >> WATCH_INFO_LENGTH__SHIFT) == 0) { WARN_ON(1); return; } rcu_read_lock(); hlist_for_each_entry_rcu(watch, &wlist->watchers, list_node) { if (watch->id != id) continue; n->info &= ~WATCH_INFO_ID; n->info |= watch->info_id; wqueue = rcu_dereference(watch->queue); wf = rcu_dereference(wqueue->filter); if (wf && !filter_watch_notification(wf, n)) continue; if (security_post_notification(watch->cred, cred, n) < 0) continue; if (lock_wqueue(wqueue)) { post_one_notification(wqueue, n); unlock_wqueue(wqueue); } } rcu_read_unlock(); } EXPORT_SYMBOL(__post_watch_notification); /* * Allocate sufficient pages to preallocation for the requested number of * notifications. */ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes) { struct watch_queue *wqueue = pipe->watch_queue; struct page **pages; unsigned long *bitmap; unsigned long user_bufs; int ret, i, nr_pages; if (!wqueue) return -ENODEV; if (wqueue->notes) return -EBUSY; if (nr_notes < 1 || nr_notes > 512) /* TODO: choose a better hard limit */ return -EINVAL; nr_pages = (nr_notes + WATCH_QUEUE_NOTES_PER_PAGE - 1); nr_pages /= WATCH_QUEUE_NOTES_PER_PAGE; user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_pages); if (nr_pages > pipe->max_usage && (too_many_pipe_buffers_hard(user_bufs) || too_many_pipe_buffers_soft(user_bufs)) && pipe_is_unprivileged_user()) { ret = -EPERM; goto error; } nr_notes = nr_pages * WATCH_QUEUE_NOTES_PER_PAGE; ret = pipe_resize_ring(pipe, roundup_pow_of_two(nr_notes)); if (ret < 0) goto error; /* * pipe_resize_ring() does not update nr_accounted for watch_queue * pipes, because the above vastly overprovisions. Set nr_accounted on * and max_usage this pipe to the number that was actually charged to * the user above via account_pipe_buffers. */ pipe->max_usage = nr_pages; pipe->nr_accounted = nr_pages; ret = -ENOMEM; pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); if (!pages) goto error; for (i = 0; i < nr_pages; i++) { pages[i] = alloc_page(GFP_KERNEL); if (!pages[i]) goto error_p; pages[i]->private = i * WATCH_QUEUE_NOTES_PER_PAGE; } bitmap = bitmap_alloc(nr_notes, GFP_KERNEL); if (!bitmap) goto error_p; bitmap_fill(bitmap, nr_notes); wqueue->notes = pages; wqueue->notes_bitmap = bitmap; wqueue->nr_pages = nr_pages; wqueue->nr_notes = nr_notes; return 0; error_p: while (--i >= 0) __free_page(pages[i]); kfree(pages); error: (void) account_pipe_buffers(pipe->user, nr_pages, pipe->nr_accounted); return ret; } /* * Set the filter on a watch queue. */ long watch_queue_set_filter(struct pipe_inode_info *pipe, struct watch_notification_filter __user *_filter) { struct watch_notification_type_filter *tf; struct watch_notification_filter filter; struct watch_type_filter *q; struct watch_filter *wfilter; struct watch_queue *wqueue = pipe->watch_queue; int ret, nr_filter = 0, i; if (!wqueue) return -ENODEV; if (!_filter) { /* Remove the old filter */ wfilter = NULL; goto set; } /* Grab the user's filter specification */ if (copy_from_user(&filter, _filter, sizeof(filter)) != 0) return -EFAULT; if (filter.nr_filters == 0 || filter.nr_filters > 16 || filter.__reserved != 0) return -EINVAL; tf = memdup_array_user(_filter->filters, filter.nr_filters, sizeof(*tf)); if (IS_ERR(tf)) return PTR_ERR(tf); ret = -EINVAL; for (i = 0; i < filter.nr_filters; i++) { if ((tf[i].info_filter & ~tf[i].info_mask) || tf[i].info_mask & WATCH_INFO_LENGTH) goto err_filter; /* Ignore any unknown types */ if (tf[i].type >= WATCH_TYPE__NR) continue; nr_filter++; } /* Now we need to build the internal filter from only the relevant * user-specified filters. */ ret = -ENOMEM; wfilter = kzalloc(struct_size(wfilter, filters, nr_filter), GFP_KERNEL); if (!wfilter) goto err_filter; wfilter->nr_filters = nr_filter; q = wfilter->filters; for (i = 0; i < filter.nr_filters; i++) { if (tf[i].type >= WATCH_TYPE__NR) continue; q->type = tf[i].type; q->info_filter = tf[i].info_filter; q->info_mask = tf[i].info_mask; q->subtype_filter[0] = tf[i].subtype_filter[0]; __set_bit(q->type, wfilter->type_filter); q++; } kfree(tf); set: pipe_lock(pipe); wfilter = rcu_replace_pointer(wqueue->filter, wfilter, lockdep_is_held(&pipe->mutex)); pipe_unlock(pipe); if (wfilter) kfree_rcu(wfilter, rcu); return 0; err_filter: kfree(tf); return ret; } static void __put_watch_queue(struct kref *kref) { struct watch_queue *wqueue = container_of(kref, struct watch_queue, usage); struct watch_filter *wfilter; int i; for (i = 0; i < wqueue->nr_pages; i++) __free_page(wqueue->notes[i]); kfree(wqueue->notes); bitmap_free(wqueue->notes_bitmap); wfilter = rcu_access_pointer(wqueue->filter); if (wfilter) kfree_rcu(wfilter, rcu); kfree_rcu(wqueue, rcu); } /** * put_watch_queue - Dispose of a ref on a watchqueue. * @wqueue: The watch queue to unref. */ void put_watch_queue(struct watch_queue *wqueue) { kref_put(&wqueue->usage, __put_watch_queue); } EXPORT_SYMBOL(put_watch_queue); static void free_watch(struct rcu_head *rcu) { struct watch *watch = container_of(rcu, struct watch, rcu); put_watch_queue(rcu_access_pointer(watch->queue)); atomic_dec(&watch->cred->user->nr_watches); put_cred(watch->cred); kfree(watch); } static void __put_watch(struct kref *kref) { struct watch *watch = container_of(kref, struct watch, usage); call_rcu(&watch->rcu, free_watch); } /* * Discard a watch. */ static void put_watch(struct watch *watch) { kref_put(&watch->usage, __put_watch); } /** * init_watch - Initialise a watch * @watch: The watch to initialise. * @wqueue: The queue to assign. * * Initialise a watch and set the watch queue. */ void init_watch(struct watch *watch, struct watch_queue *wqueue) { kref_init(&watch->usage); INIT_HLIST_NODE(&watch->list_node); INIT_HLIST_NODE(&watch->queue_node); rcu_assign_pointer(watch->queue, wqueue); } static int add_one_watch(struct watch *watch, struct watch_list *wlist, struct watch_queue *wqueue) { const struct cred *cred; struct watch *w; hlist_for_each_entry(w, &wlist->watchers, list_node) { struct watch_queue *wq = rcu_access_pointer(w->queue); if (wqueue == wq && watch->id == w->id) return -EBUSY; } cred = current_cred(); if (atomic_inc_return(&cred->user->nr_watches) > task_rlimit(current, RLIMIT_NOFILE)) { atomic_dec(&cred->user->nr_watches); return -EAGAIN; } watch->cred = get_cred(cred); rcu_assign_pointer(watch->watch_list, wlist); kref_get(&wqueue->usage); kref_get(&watch->usage); hlist_add_head(&watch->queue_node, &wqueue->watches); hlist_add_head_rcu(&watch->list_node, &wlist->watchers); return 0; } /** * add_watch_to_object - Add a watch on an object to a watch list * @watch: The watch to add * @wlist: The watch list to add to * * @watch->queue must have been set to point to the queue to post notifications * to and the watch list of the object to be watched. @watch->cred must also * have been set to the appropriate credentials and a ref taken on them. * * The caller must pin the queue and the list both and must hold the list * locked against racing watch additions/removals. */ int add_watch_to_object(struct watch *watch, struct watch_list *wlist) { struct watch_queue *wqueue; int ret = -ENOENT; rcu_read_lock(); wqueue = rcu_access_pointer(watch->queue); if (lock_wqueue(wqueue)) { spin_lock(&wlist->lock); ret = add_one_watch(watch, wlist, wqueue); spin_unlock(&wlist->lock); unlock_wqueue(wqueue); } rcu_read_unlock(); return ret; } EXPORT_SYMBOL(add_watch_to_object); /** * remove_watch_from_object - Remove a watch or all watches from an object. * @wlist: The watch list to remove from * @wq: The watch queue of interest (ignored if @all is true) * @id: The ID of the watch to remove (ignored if @all is true) * @all: True to remove all objects * * Remove a specific watch or all watches from an object. A notification is * sent to the watcher to tell them that this happened. */ int remove_watch_from_object(struct watch_list *wlist, struct watch_queue *wq, u64 id, bool all) { struct watch_notification_removal n; struct watch_queue *wqueue; struct watch *watch; int ret = -EBADSLT; rcu_read_lock(); again: spin_lock(&wlist->lock); hlist_for_each_entry(watch, &wlist->watchers, list_node) { if (all || (watch->id == id && rcu_access_pointer(watch->queue) == wq)) goto found; } spin_unlock(&wlist->lock); goto out; found: ret = 0; hlist_del_init_rcu(&watch->list_node); rcu_assign_pointer(watch->watch_list, NULL); spin_unlock(&wlist->lock); /* We now own the reference on watch that used to belong to wlist. */ n.watch.type = WATCH_TYPE_META; n.watch.subtype = WATCH_META_REMOVAL_NOTIFICATION; n.watch.info = watch->info_id | watch_sizeof(n.watch); n.id = id; if (id != 0) n.watch.info = watch->info_id | watch_sizeof(n); wqueue = rcu_dereference(watch->queue); if (lock_wqueue(wqueue)) { post_one_notification(wqueue, &n.watch); if (!hlist_unhashed(&watch->queue_node)) { hlist_del_init_rcu(&watch->queue_node); put_watch(watch); } unlock_wqueue(wqueue); } if (wlist->release_watch) { void (*release_watch)(struct watch *); release_watch = wlist->release_watch; rcu_read_unlock(); (*release_watch)(watch); rcu_read_lock(); } put_watch(watch); if (all && !hlist_empty(&wlist->watchers)) goto again; out: rcu_read_unlock(); return ret; } EXPORT_SYMBOL(remove_watch_from_object); /* * Remove all the watches that are contributory to a queue. This has the * potential to race with removal of the watches by the destruction of the * objects being watched or with the distribution of notifications. */ void watch_queue_clear(struct watch_queue *wqueue) { struct watch_list *wlist; struct watch *watch; bool release; rcu_read_lock(); spin_lock_bh(&wqueue->lock); /* * This pipe can be freed by callers like free_pipe_info(). * Removing this reference also prevents new notifications. */ wqueue->pipe = NULL; while (!hlist_empty(&wqueue->watches)) { watch = hlist_entry(wqueue->watches.first, struct watch, queue_node); hlist_del_init_rcu(&watch->queue_node); /* We now own a ref on the watch. */ spin_unlock_bh(&wqueue->lock); /* We can't do the next bit under the queue lock as we need to * get the list lock - which would cause a deadlock if someone * was removing from the opposite direction at the same time or * posting a notification. */ wlist = rcu_dereference(watch->watch_list); if (wlist) { void (*release_watch)(struct watch *); spin_lock(&wlist->lock); release = !hlist_unhashed(&watch->list_node); if (release) { hlist_del_init_rcu(&watch->list_node); rcu_assign_pointer(watch->watch_list, NULL); /* We now own a second ref on the watch. */ } release_watch = wlist->release_watch; spin_unlock(&wlist->lock); if (release) { if (release_watch) { rcu_read_unlock(); /* This might need to call dput(), so * we have to drop all the locks. */ (*release_watch)(watch); rcu_read_lock(); } put_watch(watch); } } put_watch(watch); spin_lock_bh(&wqueue->lock); } spin_unlock_bh(&wqueue->lock); rcu_read_unlock(); } /** * get_watch_queue - Get a watch queue from its file descriptor. * @fd: The fd to query. */ struct watch_queue *get_watch_queue(int fd) { struct pipe_inode_info *pipe; struct watch_queue *wqueue = ERR_PTR(-EINVAL); CLASS(fd, f)(fd); if (!fd_empty(f)) { pipe = get_pipe_info(fd_file(f), false); if (pipe && pipe->watch_queue) { wqueue = pipe->watch_queue; kref_get(&wqueue->usage); } } return wqueue; } EXPORT_SYMBOL(get_watch_queue); /* * Initialise a watch queue */ int watch_queue_init(struct pipe_inode_info *pipe) { struct watch_queue *wqueue; wqueue = kzalloc(sizeof(*wqueue), GFP_KERNEL); if (!wqueue) return -ENOMEM; wqueue->pipe = pipe; kref_init(&wqueue->usage); spin_lock_init(&wqueue->lock); INIT_HLIST_HEAD(&wqueue->watches); pipe->watch_queue = wqueue; return 0; } |
| 23 23 22 23 23 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 | // SPDX-License-Identifier: GPL-2.0 or MIT /* * Copyright (C) 2016 Noralf Trønnes * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. */ #include <linux/io.h> #include <linux/iosys-map.h> #include <linux/module.h> #include <linux/slab.h> #include <drm/drm_device.h> #include <drm/drm_format_helper.h> #include <drm/drm_framebuffer.h> #include <drm/drm_fourcc.h> #include <drm/drm_print.h> #include <drm/drm_rect.h> #include "drm_format_internal.h" /** * drm_format_conv_state_init - Initialize format-conversion state * @state: The state to initialize * * Clears all fields in struct drm_format_conv_state. The state will * be empty with no preallocated resources. */ void drm_format_conv_state_init(struct drm_format_conv_state *state) { state->tmp.mem = NULL; state->tmp.size = 0; state->tmp.preallocated = false; } EXPORT_SYMBOL(drm_format_conv_state_init); /** * drm_format_conv_state_copy - Copy format-conversion state * @state: Destination state * @old_state: Source state * * Copies format-conversion state from @old_state to @state; except for * temporary storage. */ void drm_format_conv_state_copy(struct drm_format_conv_state *state, const struct drm_format_conv_state *old_state) { /* * So far, there's only temporary storage here, which we don't * duplicate. Just clear the fields. */ state->tmp.mem = NULL; state->tmp.size = 0; state->tmp.preallocated = false; } EXPORT_SYMBOL(drm_format_conv_state_copy); /** * drm_format_conv_state_reserve - Allocates storage for format conversion * @state: The format-conversion state * @new_size: The minimum allocation size * @flags: Flags for kmalloc() * * Allocates at least @new_size bytes and returns a pointer to the memory * range. After calling this function, previously returned memory blocks * are invalid. It's best to collect all memory requirements of a format * conversion and call this function once to allocate the range. * * Returns: * A pointer to the allocated memory range, or NULL otherwise. */ void *drm_format_conv_state_reserve(struct drm_format_conv_state *state, size_t new_size, gfp_t flags) { void *mem; if (new_size <= state->tmp.size) goto out; else if (state->tmp.preallocated) return NULL; mem = krealloc(state->tmp.mem, new_size, flags); if (!mem) return NULL; state->tmp.mem = mem; state->tmp.size = new_size; out: return state->tmp.mem; } EXPORT_SYMBOL(drm_format_conv_state_reserve); /** * drm_format_conv_state_release - Releases an format-conversion storage * @state: The format-conversion state * * Releases the memory range references by the format-conversion state. * After this call, all pointers to the memory are invalid. Prefer * drm_format_conv_state_init() for cleaning up and unloading a driver. */ void drm_format_conv_state_release(struct drm_format_conv_state *state) { if (state->tmp.preallocated) return; kfree(state->tmp.mem); state->tmp.mem = NULL; state->tmp.size = 0; } EXPORT_SYMBOL(drm_format_conv_state_release); static unsigned int clip_offset(const struct drm_rect *clip, unsigned int pitch, unsigned int cpp) { return clip->y1 * pitch + clip->x1 * cpp; } /** * drm_fb_clip_offset - Returns the clipping rectangles byte-offset in a framebuffer * @pitch: Framebuffer line pitch in byte * @format: Framebuffer format * @clip: Clip rectangle * * Returns: * The byte offset of the clip rectangle's top-left corner within the framebuffer. */ unsigned int drm_fb_clip_offset(unsigned int pitch, const struct drm_format_info *format, const struct drm_rect *clip) { return clip_offset(clip, pitch, format->cpp[0]); } EXPORT_SYMBOL(drm_fb_clip_offset); /* TODO: Make this function work with multi-plane formats. */ static int __drm_fb_xfrm(void *dst, unsigned long dst_pitch, unsigned long dst_pixsize, const void *vaddr, const struct drm_framebuffer *fb, const struct drm_rect *clip, bool vaddr_cached_hint, struct drm_format_conv_state *state, void (*xfrm_line)(void *dbuf, const void *sbuf, unsigned int npixels)) { unsigned long linepixels = drm_rect_width(clip); unsigned long lines = drm_rect_height(clip); size_t sbuf_len = linepixels * fb->format->cpp[0]; void *stmp = NULL; unsigned long i; const void *sbuf; /* * Some source buffers, such as DMA memory, use write-combine * caching, so reads are uncached. Speed up access by fetching * one line at a time. */ if (!vaddr_cached_hint) { stmp = drm_format_conv_state_reserve(state, sbuf_len, GFP_KERNEL); if (!stmp) return -ENOMEM; } if (!dst_pitch) dst_pitch = drm_rect_width(clip) * dst_pixsize; vaddr += clip_offset(clip, fb->pitches[0], fb->format->cpp[0]); for (i = 0; i < lines; ++i) { if (stmp) sbuf = memcpy(stmp, vaddr, sbuf_len); else sbuf = vaddr; xfrm_line(dst, sbuf, linepixels); vaddr += fb->pitches[0]; dst += dst_pitch; } return 0; } /* TODO: Make this function work with multi-plane formats. */ static int __drm_fb_xfrm_toio(void __iomem *dst, unsigned long dst_pitch, unsigned long dst_pixsize, const void *vaddr, const struct drm_framebuffer *fb, const struct drm_rect *clip, bool vaddr_cached_hint, struct drm_format_conv_state *state, void (*xfrm_line)(void *dbuf, const void *sbuf, unsigned int npixels)) { unsigned long linepixels = drm_rect_width(clip); unsigned long lines = drm_rect_height(clip); size_t dbuf_len = linepixels * dst_pixsize; size_t stmp_off = round_up(dbuf_len, ARCH_KMALLOC_MINALIGN); /* for sbuf alignment */ size_t sbuf_len = linepixels * fb->format->cpp[0]; void *stmp = NULL; unsigned long i; const void *sbuf; void *dbuf; if (vaddr_cached_hint) { dbuf = drm_format_conv_state_reserve(state, dbuf_len, GFP_KERNEL); } else { dbuf = drm_format_conv_state_reserve(state, stmp_off + sbuf_len, GFP_KERNEL); stmp = dbuf + stmp_off; } if (!dbuf) return -ENOMEM; if (!dst_pitch) dst_pitch = linepixels * dst_pixsize; vaddr += clip_offset(clip, fb->pitches[0], fb->format->cpp[0]); for (i = 0; i < lines; ++i) { if (stmp) sbuf = memcpy(stmp, vaddr, sbuf_len); else sbuf = vaddr; xfrm_line(dbuf, sbuf, linepixels); memcpy_toio(dst, dbuf, dbuf_len); vaddr += fb->pitches[0]; dst += dst_pitch; } return 0; } /* TODO: Make this function work with multi-plane formats. */ static int drm_fb_xfrm(struct iosys_map *dst, const unsigned int *dst_pitch, const u8 *dst_pixsize, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, bool vaddr_cached_hint, struct drm_format_conv_state *state, void (*xfrm_line)(void *dbuf, const void *sbuf, unsigned int npixels)) { static const unsigned int default_dst_pitch[DRM_FORMAT_MAX_PLANES] = { 0, 0, 0, 0 }; if (!dst_pitch) dst_pitch = default_dst_pitch; /* TODO: handle src in I/O memory here */ if (dst[0].is_iomem) return __drm_fb_xfrm_toio(dst[0].vaddr_iomem, dst_pitch[0], dst_pixsize[0], src[0].vaddr, fb, clip, vaddr_cached_hint, state, xfrm_line); else return __drm_fb_xfrm(dst[0].vaddr, dst_pitch[0], dst_pixsize[0], src[0].vaddr, fb, clip, vaddr_cached_hint, state, xfrm_line); } #define ALIGN_DOWN_PIXELS(end, n, a) \ ((end) - ((n) & ((a) - 1))) static __always_inline void drm_fb_xfrm_line_32to8(void *dbuf, const void *sbuf, unsigned int pixels, u32 (*xfrm_pixel)(u32)) { __le32 *dbuf32 = dbuf; u8 *dbuf8; const __le32 *sbuf32 = sbuf; const __le32 *send32 = sbuf32 + pixels; /* write 4 pixels at once */ while (sbuf32 < ALIGN_DOWN_PIXELS(send32, pixels, 4)) { u32 pix[4] = { le32_to_cpup(sbuf32), le32_to_cpup(sbuf32 + 1), le32_to_cpup(sbuf32 + 2), le32_to_cpup(sbuf32 + 3), }; /* write output bytes in reverse order for little endianness */ u32 val32 = xfrm_pixel(pix[0]) | (xfrm_pixel(pix[1]) << 8) | (xfrm_pixel(pix[2]) << 16) | (xfrm_pixel(pix[3]) << 24); *dbuf32++ = cpu_to_le32(val32); sbuf32 += ARRAY_SIZE(pix); } /* write trailing pixels */ dbuf8 = (u8 __force *)dbuf32; while (sbuf32 < send32) *dbuf8++ = xfrm_pixel(le32_to_cpup(sbuf32++)); } static __always_inline void drm_fb_xfrm_line_32to16(void *dbuf, const void *sbuf, unsigned int pixels, u32 (*xfrm_pixel)(u32)) { __le64 *dbuf64 = dbuf; __le32 *dbuf32; __le16 *dbuf16; const __le32 *sbuf32 = sbuf; const __le32 *send32 = sbuf32 + pixels; #if defined(CONFIG_64BIT) /* write 4 pixels at once */ while (sbuf32 < ALIGN_DOWN_PIXELS(send32, pixels, 4)) { u32 pix[4] = { le32_to_cpup(sbuf32), le32_to_cpup(sbuf32 + 1), le32_to_cpup(sbuf32 + 2), le32_to_cpup(sbuf32 + 3), }; /* write output bytes in reverse order for little endianness */ u64 val64 = ((u64)xfrm_pixel(pix[0])) | ((u64)xfrm_pixel(pix[1]) << 16) | ((u64)xfrm_pixel(pix[2]) << 32) | ((u64)xfrm_pixel(pix[3]) << 48); *dbuf64++ = cpu_to_le64(val64); sbuf32 += ARRAY_SIZE(pix); } #endif /* write 2 pixels at once */ dbuf32 = (__le32 __force *)dbuf64; while (sbuf32 < ALIGN_DOWN_PIXELS(send32, pixels, 2)) { u32 pix[2] = { le32_to_cpup(sbuf32), le32_to_cpup(sbuf32 + 1), }; /* write output bytes in reverse order for little endianness */ u32 val32 = xfrm_pixel(pix[0]) | (xfrm_pixel(pix[1]) << 16); *dbuf32++ = cpu_to_le32(val32); sbuf32 += ARRAY_SIZE(pix); } /* write trailing pixel */ dbuf16 = (__le16 __force *)dbuf32; while (sbuf32 < send32) *dbuf16++ = cpu_to_le16(xfrm_pixel(le32_to_cpup(sbuf32++))); } static __always_inline void drm_fb_xfrm_line_32to24(void *dbuf, const void *sbuf, unsigned int pixels, u32 (*xfrm_pixel)(u32)) { __le32 *dbuf32 = dbuf; u8 *dbuf8; const __le32 *sbuf32 = sbuf; const __le32 *send32 = sbuf32 + pixels; /* write pixels in chunks of 4 */ while (sbuf32 < ALIGN_DOWN_PIXELS(send32, pixels, 4)) { u32 val24[4] = { xfrm_pixel(le32_to_cpup(sbuf32)), xfrm_pixel(le32_to_cpup(sbuf32 + 1)), xfrm_pixel(le32_to_cpup(sbuf32 + 2)), xfrm_pixel(le32_to_cpup(sbuf32 + 3)), }; u32 out32[3] = { /* write output bytes in reverse order for little endianness */ ((val24[0] & 0x000000ff)) | ((val24[0] & 0x0000ff00)) | ((val24[0] & 0x00ff0000)) | ((val24[1] & 0x000000ff) << 24), ((val24[1] & 0x0000ff00) >> 8) | ((val24[1] & 0x00ff0000) >> 8) | ((val24[2] & 0x000000ff) << 16) | ((val24[2] & 0x0000ff00) << 16), ((val24[2] & 0x00ff0000) >> 16) | ((val24[3] & 0x000000ff) << 8) | ((val24[3] & 0x0000ff00) << 8) | ((val24[3] & 0x00ff0000) << 8), }; *dbuf32++ = cpu_to_le32(out32[0]); *dbuf32++ = cpu_to_le32(out32[1]); *dbuf32++ = cpu_to_le32(out32[2]); sbuf32 += ARRAY_SIZE(val24); } /* write trailing pixel */ dbuf8 = (u8 __force *)dbuf32; while (sbuf32 < send32) { u32 val24 = xfrm_pixel(le32_to_cpup(sbuf32++)); /* write output in reverse order for little endianness */ *dbuf8++ = (val24 & 0x000000ff); *dbuf8++ = (val24 & 0x0000ff00) >> 8; *dbuf8++ = (val24 & 0x00ff0000) >> 16; } } static __always_inline void drm_fb_xfrm_line_32to32(void *dbuf, const void *sbuf, unsigned int pixels, u32 (*xfrm_pixel)(u32)) { __le32 *dbuf32 = dbuf; const __le32 *sbuf32 = sbuf; const __le32 *send32 = sbuf32 + pixels; while (sbuf32 < send32) *dbuf32++ = cpu_to_le32(xfrm_pixel(le32_to_cpup(sbuf32++))); } /** * drm_fb_memcpy - Copy clip buffer * @dst: Array of destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * * This function copies parts of a framebuffer to display memory. Destination and * framebuffer formats must match. No conversion takes place. The parameters @dst, * @dst_pitch and @src refer to arrays. Each array must have at least as many entries * as there are planes in @fb's format. Each entry stores the value for the format's * respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). */ void drm_fb_memcpy(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip) { static const unsigned int default_dst_pitch[DRM_FORMAT_MAX_PLANES] = { 0, 0, 0, 0 }; const struct drm_format_info *format = fb->format; unsigned int i, y, lines = drm_rect_height(clip); if (!dst_pitch) dst_pitch = default_dst_pitch; for (i = 0; i < format->num_planes; ++i) { unsigned int bpp_i = drm_format_info_bpp(format, i); unsigned int cpp_i = DIV_ROUND_UP(bpp_i, 8); size_t len_i = DIV_ROUND_UP(drm_rect_width(clip) * bpp_i, 8); unsigned int dst_pitch_i = dst_pitch[i]; struct iosys_map dst_i = dst[i]; struct iosys_map src_i = src[i]; if (!dst_pitch_i) dst_pitch_i = len_i; iosys_map_incr(&src_i, clip_offset(clip, fb->pitches[i], cpp_i)); for (y = 0; y < lines; y++) { /* TODO: handle src_i in I/O memory here */ iosys_map_memcpy_to(&dst_i, 0, src_i.vaddr, len_i); iosys_map_incr(&src_i, fb->pitches[i]); iosys_map_incr(&dst_i, dst_pitch_i); } } } EXPORT_SYMBOL(drm_fb_memcpy); static void drm_fb_swab16_line(void *dbuf, const void *sbuf, unsigned int pixels) { u16 *dbuf16 = dbuf; const u16 *sbuf16 = sbuf; const u16 *send16 = sbuf16 + pixels; while (sbuf16 < send16) *dbuf16++ = swab16(*sbuf16++); } static void drm_fb_swab32_line(void *dbuf, const void *sbuf, unsigned int pixels) { u32 *dbuf32 = dbuf; const u32 *sbuf32 = sbuf; const u32 *send32 = sbuf32 + pixels; while (sbuf32 < send32) *dbuf32++ = swab32(*sbuf32++); } /** * drm_fb_swab - Swap bytes into clip buffer * @dst: Array of destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @cached: Source buffer is mapped cached (eg. not write-combined) * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and swaps per-pixel * bytes during the process. Destination and framebuffer formats must match. The * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at * least as many entries as there are planes in @fb's format. Each entry stores the * value for the format's respective color plane at the same index. If @cached is * false a temporary buffer is used to cache one pixel line at a time to speed up * slow uncached reads. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). */ void drm_fb_swab(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, bool cached, struct drm_format_conv_state *state) { const struct drm_format_info *format = fb->format; u8 cpp = DIV_ROUND_UP(drm_format_info_bpp(format, 0), 8); void (*swab_line)(void *dbuf, const void *sbuf, unsigned int npixels); switch (cpp) { case 4: swab_line = drm_fb_swab32_line; break; case 2: swab_line = drm_fb_swab16_line; break; default: drm_warn_once(fb->dev, "Format %p4cc has unsupported pixel size.\n", &format->format); return; } drm_fb_xfrm(dst, dst_pitch, &cpp, src, fb, clip, cached, state, swab_line); } EXPORT_SYMBOL(drm_fb_swab); static void drm_fb_xrgb8888_to_rgb332_line(void *dbuf, const void *sbuf, unsigned int pixels) { drm_fb_xfrm_line_32to8(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_rgb332); } /** * drm_fb_xrgb8888_to_rgb332 - Convert XRGB8888 to RGB332 clip buffer * @dst: Array of RGB332 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at * least as many entries as there are planes in @fb's format. Each entry stores the * value for the format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for RGB332 devices that don't support XRGB8888 natively. */ void drm_fb_xrgb8888_to_rgb332(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 1, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_rgb332_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb332); static void drm_fb_xrgb8888_to_rgb565_line(void *dbuf, const void *sbuf, unsigned int pixels) { drm_fb_xfrm_line_32to16(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_rgb565); } static __always_inline u32 drm_xrgb8888_to_rgb565_swab(u32 pix) { return swab16(drm_pixel_xrgb8888_to_rgb565(pix)); } /* TODO: implement this helper as conversion to RGB565|BIG_ENDIAN */ static void drm_fb_xrgb8888_to_rgb565_swab_line(void *dbuf, const void *sbuf, unsigned int pixels) { drm_fb_xfrm_line_32to16(dbuf, sbuf, pixels, drm_xrgb8888_to_rgb565_swab); } /** * drm_fb_xrgb8888_to_rgb565 - Convert XRGB8888 to RGB565 clip buffer * @dst: Array of RGB565 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * @swab: Swap bytes * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at * least as many entries as there are planes in @fb's format. Each entry stores the * value for the format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for RGB565 devices that don't support XRGB8888 natively. */ void drm_fb_xrgb8888_to_rgb565(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state, bool swab) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 2, }; void (*xfrm_line)(void *dbuf, const void *sbuf, unsigned int npixels); if (swab) xfrm_line = drm_fb_xrgb8888_to_rgb565_swab_line; else xfrm_line = drm_fb_xrgb8888_to_rgb565_line; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, xfrm_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb565); static void drm_fb_xrgb8888_to_xrgb1555_line(void *dbuf, const void *sbuf, unsigned int pixels) { drm_fb_xfrm_line_32to16(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_xrgb1555); } /** * drm_fb_xrgb8888_to_xrgb1555 - Convert XRGB8888 to XRGB1555 clip buffer * @dst: Array of XRGB1555 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts * the color format during the process. The parameters @dst, @dst_pitch and * @src refer to arrays. Each array must have at least as many entries as * there are planes in @fb's format. Each entry stores the value for the * format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for XRGB1555 devices that don't support * XRGB8888 natively. */ void drm_fb_xrgb8888_to_xrgb1555(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 2, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_xrgb1555_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_xrgb1555); static void drm_fb_xrgb8888_to_argb1555_line(void *dbuf, const void *sbuf, unsigned int pixels) { drm_fb_xfrm_line_32to16(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_argb1555); } /** * drm_fb_xrgb8888_to_argb1555 - Convert XRGB8888 to ARGB1555 clip buffer * @dst: Array of ARGB1555 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts * the color format during the process. The parameters @dst, @dst_pitch and * @src refer to arrays. Each array must have at least as many entries as * there are planes in @fb's format. Each entry stores the value for the * format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for ARGB1555 devices that don't support * XRGB8888 natively. It sets an opaque alpha channel as part of the conversion. */ void drm_fb_xrgb8888_to_argb1555(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 2, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_argb1555_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_argb1555); static void drm_fb_xrgb8888_to_rgba5551_line(void *dbuf, const void *sbuf, unsigned int pixels) { drm_fb_xfrm_line_32to16(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_rgba5551); } /** * drm_fb_xrgb8888_to_rgba5551 - Convert XRGB8888 to RGBA5551 clip buffer * @dst: Array of RGBA5551 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts * the color format during the process. The parameters @dst, @dst_pitch and * @src refer to arrays. Each array must have at least as many entries as * there are planes in @fb's format. Each entry stores the value for the * format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for RGBA5551 devices that don't support * XRGB8888 natively. It sets an opaque alpha channel as part of the conversion. */ void drm_fb_xrgb8888_to_rgba5551(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 2, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_rgba5551_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgba5551); static void drm_fb_xrgb8888_to_rgb888_line(void *dbuf, const void *sbuf, unsigned int pixels) { drm_fb_xfrm_line_32to24(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_rgb888); } /** * drm_fb_xrgb8888_to_rgb888 - Convert XRGB8888 to RGB888 clip buffer * @dst: Array of RGB888 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at * least as many entries as there are planes in @fb's format. Each entry stores the * value for the format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for RGB888 devices that don't natively * support XRGB8888. */ void drm_fb_xrgb8888_to_rgb888(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 3, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_rgb888_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb888); static void drm_fb_xrgb8888_to_bgr888_line(void *dbuf, const void *sbuf, unsigned int pixels) { drm_fb_xfrm_line_32to24(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_bgr888); } /** * drm_fb_xrgb8888_to_bgr888 - Convert XRGB8888 to BGR888 clip buffer * @dst: Array of BGR888 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at * least as many entries as there are planes in @fb's format. Each entry stores the * value for the format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for BGR888 devices that don't natively * support XRGB8888. */ void drm_fb_xrgb8888_to_bgr888(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 3, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_bgr888_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_bgr888); static void drm_fb_xrgb8888_to_argb8888_line(void *dbuf, const void *sbuf, unsigned int pixels) { drm_fb_xfrm_line_32to32(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_argb8888); } /** * drm_fb_xrgb8888_to_argb8888 - Convert XRGB8888 to ARGB8888 clip buffer * @dst: Array of ARGB8888 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. The parameters @dst, @dst_pitch and @src refer * to arrays. Each array must have at least as many entries as there are planes in * @fb's format. Each entry stores the value for the format's respective color plane * at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for ARGB8888 devices that don't support XRGB8888 * natively. It sets an opaque alpha channel as part of the conversion. */ void drm_fb_xrgb8888_to_argb8888(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_argb8888_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_argb8888); static void drm_fb_xrgb8888_to_abgr8888_line(void *dbuf, const void *sbuf, unsigned int pixels) { drm_fb_xfrm_line_32to32(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_abgr8888); } static void drm_fb_xrgb8888_to_abgr8888(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_abgr8888_line); } static void drm_fb_xrgb8888_to_xbgr8888_line(void *dbuf, const void *sbuf, unsigned int pixels) { drm_fb_xfrm_line_32to32(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_xbgr8888); } static void drm_fb_xrgb8888_to_xbgr8888(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_xbgr8888_line); } static void drm_fb_xrgb8888_to_xrgb2101010_line(void *dbuf, const void *sbuf, unsigned int pixels) { drm_fb_xfrm_line_32to32(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_xrgb2101010); } /** * drm_fb_xrgb8888_to_xrgb2101010 - Convert XRGB8888 to XRGB2101010 clip buffer * @dst: Array of XRGB2101010 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at * least as many entries as there are planes in @fb's format. Each entry stores the * value for the format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for XRGB2101010 devices that don't support XRGB8888 * natively. */ void drm_fb_xrgb8888_to_xrgb2101010(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_xrgb2101010_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_xrgb2101010); static void drm_fb_xrgb8888_to_argb2101010_line(void *dbuf, const void *sbuf, unsigned int pixels) { drm_fb_xfrm_line_32to32(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_argb2101010); } /** * drm_fb_xrgb8888_to_argb2101010 - Convert XRGB8888 to ARGB2101010 clip buffer * @dst: Array of ARGB2101010 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts * the color format during the process. The parameters @dst, @dst_pitch and * @src refer to arrays. Each array must have at least as many entries as * there are planes in @fb's format. Each entry stores the value for the * format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for ARGB2101010 devices that don't support XRGB8888 * natively. */ void drm_fb_xrgb8888_to_argb2101010(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_argb2101010_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_argb2101010); static void drm_fb_xrgb8888_to_gray8_line(void *dbuf, const void *sbuf, unsigned int pixels) { drm_fb_xfrm_line_32to8(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_r8_bt601); } /** * drm_fb_xrgb8888_to_gray8 - Convert XRGB8888 to grayscale * @dst: Array of 8-bit grayscale destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at * least as many entries as there are planes in @fb's format. Each entry stores the * value for the format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * DRM doesn't have native monochrome or grayscale support. Drivers can use this * function for grayscale devices that don't support XRGB8888 natively.Such * drivers can announce the commonly supported XR24 format to userspace and use * this function to convert to the native format. Monochrome drivers will use the * most significant bit, where 1 means foreground color and 0 background color. * ITU BT.601 is being used for the RGB -> luma (brightness) conversion. */ void drm_fb_xrgb8888_to_gray8(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 1, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_gray8_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_gray8); static void drm_fb_argb8888_to_argb4444_line(void *dbuf, const void *sbuf, unsigned int pixels) { drm_fb_xfrm_line_32to16(dbuf, sbuf, pixels, drm_pixel_argb8888_to_argb4444); } /** * drm_fb_argb8888_to_argb4444 - Convert ARGB8888 to ARGB4444 clip buffer * @dst: Array of ARGB4444 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of ARGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts * the color format during the process. The parameters @dst, @dst_pitch and * @src refer to arrays. Each array must have at least as many entries as * there are planes in @fb's format. Each entry stores the value for the * format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for ARGB4444 devices that don't support * ARGB8888 natively. */ void drm_fb_argb8888_to_argb4444(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 2, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_argb8888_to_argb4444_line); } EXPORT_SYMBOL(drm_fb_argb8888_to_argb4444); /** * drm_fb_blit - Copy parts of a framebuffer to display memory * @dst: Array of display-memory addresses to copy to * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @dst_format: FOURCC code of the display's color format * @src: The framebuffer memory to copy from * @fb: The framebuffer to copy from * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory. If the * formats of the display and the framebuffer mismatch, the blit function * will attempt to convert between them during the process. The parameters @dst, * @dst_pitch and @src refer to arrays. Each array must have at least as many * entries as there are planes in @dst_format's format. Each entry stores the * value for the format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Returns: * 0 on success, or * -EINVAL if the color-format conversion failed, or * a negative error code otherwise. */ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t dst_format, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { uint32_t fb_format = fb->format->format; if (fb_format == dst_format) { drm_fb_memcpy(dst, dst_pitch, src, fb, clip); return 0; } else if (fb_format == (dst_format | DRM_FORMAT_BIG_ENDIAN)) { drm_fb_swab(dst, dst_pitch, src, fb, clip, false, state); return 0; } else if (fb_format == (dst_format & ~DRM_FORMAT_BIG_ENDIAN)) { drm_fb_swab(dst, dst_pitch, src, fb, clip, false, state); return 0; } else if (fb_format == DRM_FORMAT_XRGB8888) { if (dst_format == DRM_FORMAT_RGB565) { drm_fb_xrgb8888_to_rgb565(dst, dst_pitch, src, fb, clip, state, false); return 0; } else if (dst_format == DRM_FORMAT_XRGB1555) { drm_fb_xrgb8888_to_xrgb1555(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_ARGB1555) { drm_fb_xrgb8888_to_argb1555(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_RGBA5551) { drm_fb_xrgb8888_to_rgba5551(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_RGB888) { drm_fb_xrgb8888_to_rgb888(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_BGR888) { drm_fb_xrgb8888_to_bgr888(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_ARGB8888) { drm_fb_xrgb8888_to_argb8888(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_XBGR8888) { drm_fb_xrgb8888_to_xbgr8888(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_ABGR8888) { drm_fb_xrgb8888_to_abgr8888(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_XRGB2101010) { drm_fb_xrgb8888_to_xrgb2101010(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_ARGB2101010) { drm_fb_xrgb8888_to_argb2101010(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_BGRX8888) { drm_fb_swab(dst, dst_pitch, src, fb, clip, false, state); return 0; } } drm_warn_once(fb->dev, "No conversion helper from %p4cc to %p4cc found.\n", &fb_format, &dst_format); return -EINVAL; } EXPORT_SYMBOL(drm_fb_blit); static void drm_fb_gray8_to_mono_line(void *dbuf, const void *sbuf, unsigned int pixels) { u8 *dbuf8 = dbuf; const u8 *sbuf8 = sbuf; while (pixels) { unsigned int i, bits = min(pixels, 8U); u8 byte = 0; for (i = 0; i < bits; i++, pixels--) { if (*sbuf8++ >= 128) byte |= BIT(i); } *dbuf8++ = byte; } } /** * drm_fb_xrgb8888_to_mono - Convert XRGB8888 to monochrome * @dst: Array of monochrome destination buffers (0=black, 1=white) * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at * least as many entries as there are planes in @fb's format. Each entry stores the * value for the format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). The first pixel (upper left corner of the clip rectangle) will * be converted and copied to the first bit (LSB) in the first byte of the monochrome * destination buffer. If the caller requires that the first pixel in a byte must * be located at an x-coordinate that is a multiple of 8, then the caller must take * care itself of supplying a suitable clip rectangle. * * DRM doesn't have native monochrome support. Drivers can use this function for * monochrome devices that don't support XRGB8888 natively. Such drivers can * announce the commonly supported XR24 format to userspace and use this function * to convert to the native format. * * This function uses drm_fb_xrgb8888_to_gray8() to convert to grayscale and * then the result is converted from grayscale to monochrome. */ void drm_fb_xrgb8888_to_mono(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const unsigned int default_dst_pitch[DRM_FORMAT_MAX_PLANES] = { 0, 0, 0, 0 }; unsigned int linepixels = drm_rect_width(clip); unsigned int lines = drm_rect_height(clip); unsigned int cpp = fb->format->cpp[0]; unsigned int len_src32 = linepixels * cpp; struct drm_device *dev = fb->dev; void *vaddr = src[0].vaddr; unsigned int dst_pitch_0; unsigned int y; u8 *mono = dst[0].vaddr, *gray8; u32 *src32; if (drm_WARN_ON(dev, fb->format->format != DRM_FORMAT_XRGB8888)) return; if (!dst_pitch) dst_pitch = default_dst_pitch; dst_pitch_0 = dst_pitch[0]; /* * The mono destination buffer contains 1 bit per pixel */ if (!dst_pitch_0) dst_pitch_0 = DIV_ROUND_UP(linepixels, 8); /* * The dma memory is write-combined so reads are uncached. * Speed up by fetching one line at a time. * * Also, format conversion from XR24 to monochrome are done * line-by-line but are converted to 8-bit grayscale as an * intermediate step. * * Allocate a buffer to be used for both copying from the cma * memory and to store the intermediate grayscale line pixels. */ src32 = drm_format_conv_state_reserve(state, len_src32 + linepixels, GFP_KERNEL); if (!src32) return; gray8 = (u8 *)src32 + len_src32; vaddr += clip_offset(clip, fb->pitches[0], cpp); for (y = 0; y < lines; y++) { src32 = memcpy(src32, vaddr, len_src32); drm_fb_xrgb8888_to_gray8_line(gray8, src32, linepixels); drm_fb_gray8_to_mono_line(mono, gray8, linepixels); vaddr += fb->pitches[0]; mono += dst_pitch_0; } } EXPORT_SYMBOL(drm_fb_xrgb8888_to_mono); static uint32_t drm_fb_nonalpha_fourcc(uint32_t fourcc) { /* only handle formats with depth != 0 and alpha channel */ switch (fourcc) { case DRM_FORMAT_ARGB1555: return DRM_FORMAT_XRGB1555; case DRM_FORMAT_ABGR1555: return DRM_FORMAT_XBGR1555; case DRM_FORMAT_RGBA5551: return DRM_FORMAT_RGBX5551; case DRM_FORMAT_BGRA5551: return DRM_FORMAT_BGRX5551; case DRM_FORMAT_ARGB8888: return DRM_FORMAT_XRGB8888; case DRM_FORMAT_ABGR8888: return DRM_FORMAT_XBGR8888; case DRM_FORMAT_RGBA8888: return DRM_FORMAT_RGBX8888; case DRM_FORMAT_BGRA8888: return DRM_FORMAT_BGRX8888; case DRM_FORMAT_ARGB2101010: return DRM_FORMAT_XRGB2101010; case DRM_FORMAT_ABGR2101010: return DRM_FORMAT_XBGR2101010; case DRM_FORMAT_RGBA1010102: return DRM_FORMAT_RGBX1010102; case DRM_FORMAT_BGRA1010102: return DRM_FORMAT_BGRX1010102; } return fourcc; } static bool is_listed_fourcc(const uint32_t *fourccs, size_t nfourccs, uint32_t fourcc) { const uint32_t *fourccs_end = fourccs + nfourccs; while (fourccs < fourccs_end) { if (*fourccs == fourcc) return true; ++fourccs; } return false; } /** * drm_fb_build_fourcc_list - Filters a list of supported color formats against * the device's native formats * @dev: DRM device * @native_fourccs: 4CC codes of natively supported color formats * @native_nfourccs: The number of entries in @native_fourccs * @fourccs_out: Returns 4CC codes of supported color formats * @nfourccs_out: The number of available entries in @fourccs_out * * This function create a list of supported color format from natively * supported formats and additional emulated formats. * At a minimum, most userspace programs expect at least support for * XRGB8888 on the primary plane. Devices that have to emulate the * format, and possibly others, can use drm_fb_build_fourcc_list() to * create a list of supported color formats. The returned list can * be handed over to drm_universal_plane_init() et al. Native formats * will go before emulated formats. Native formats with alpha channel * will be replaced by such without, as primary planes usually don't * support alpha. Other heuristics might be applied * to optimize the order. Formats near the beginning of the list are * usually preferred over formats near the end of the list. * * Returns: * The number of color-formats 4CC codes returned in @fourccs_out. */ size_t drm_fb_build_fourcc_list(struct drm_device *dev, const u32 *native_fourccs, size_t native_nfourccs, u32 *fourccs_out, size_t nfourccs_out) { /* * XRGB8888 is the default fallback format for most of userspace * and it's currently the only format that should be emulated for * the primary plane. Only if there's ever another default fallback, * it should be added here. */ static const uint32_t extra_fourccs[] = { DRM_FORMAT_XRGB8888, }; static const size_t extra_nfourccs = ARRAY_SIZE(extra_fourccs); u32 *fourccs = fourccs_out; const u32 *fourccs_end = fourccs_out + nfourccs_out; size_t i; /* * The device's native formats go first. */ for (i = 0; i < native_nfourccs; ++i) { /* * Several DTs, boot loaders and firmware report native * alpha formats that are non-alpha formats instead. So * replace alpha formats by non-alpha formats. */ u32 fourcc = drm_fb_nonalpha_fourcc(native_fourccs[i]); if (is_listed_fourcc(fourccs_out, fourccs - fourccs_out, fourcc)) { continue; /* skip duplicate entries */ } else if (fourccs == fourccs_end) { drm_warn(dev, "Ignoring native format %p4cc\n", &fourcc); continue; /* end of available output buffer */ } drm_dbg_kms(dev, "adding native format %p4cc\n", &fourcc); *fourccs = fourcc; ++fourccs; } /* * The extra formats, emulated by the driver, go second. */ for (i = 0; (i < extra_nfourccs) && (fourccs < fourccs_end); ++i) { u32 fourcc = extra_fourccs[i]; if (is_listed_fourcc(fourccs_out, fourccs - fourccs_out, fourcc)) { continue; /* skip duplicate and native entries */ } else if (fourccs == fourccs_end) { drm_warn(dev, "Ignoring emulated format %p4cc\n", &fourcc); continue; /* end of available output buffer */ } drm_dbg_kms(dev, "adding emulated format %p4cc\n", &fourcc); *fourccs = fourcc; ++fourccs; } return fourccs - fourccs_out; } EXPORT_SYMBOL(drm_fb_build_fourcc_list); |
| 59 63 442 571 420 420 422 171 420 267 416 265 267 413 91 2 2 16 9 7 7 17 4 14 4 12 11 20 9 7 9 1 19 191 141 44 44 41 17 24 133 141 124 38 133 36 15 23 1 14 14 421 419 420 265 267 73 73 7 422 193 191 4 14 14 14 14 18 336 18 18 28 204 202 192 1 191 20 20 20 20 8 8 2 8 18 251 81 10 81 18 18 106 95 1 105 95 18 18 18 18 14 14 18 1 74 74 74 74 1 74 1 1 7 6 2 2 2 5 25 5 5 5 5 602 369 594 592 579 577 278 600 3 3 3 325 54 5 5 5 6 6 6 3 6 6 6 6 6 152 150 60 4 4 1 12 13 13 12 13 13 2 13 1 13 13 144 91 90 10 10 10 91 2 2 2 2 2 2 2 1 2 2 2 2 2 2 20 21 21 21 21 4 2 4 2 2 1 1 1 1 4 5 3 2 1 1 5 1 5 9 9 9 9 9 1 12 11 10 9 9 9 9 9 6 6 2 1 1 1 9 2 2 9 203 13 7 7 1 352 351 352 352 351 334 168 36 134 105 12 335 68 336 336 335 2 2 334 334 334 336 336 336 18 14 14 12 18 2 1 18 3 2 18 1 18 14 14 14 18 17 18 18 16 17 17 1 17 24 3 24 8 18 4 3 6 5 4 4 1 1 1 23 251 253 252 6 19 18 421 398 398 393 399 399 275 277 399 7 1 1 1 1 1 1 1 1 6 7 6 7 7 4 4 4 4 4 2 2 2 4 2 7 15 15 15 15 15 7 6 7 7 15 7 14 15 1 13 15 90 87 12 12 12 90 2 2 2 2 2 2 2 2 2 2 2 2 2 2 6 3 6 2 4 2 1 2 1 1 1 1 6 3 1 2 3 2 2 2 1 1 1 132 7 131 131 132 132 31 28 12 6 31 31 132 101 31 31 31 31 31 30 29 30 1 31 23 15 15 21 19 19 1 1 19 1 1 19 19 21 4 3 20 12 12 9 1 20 6 57 56 56 55 55 2 57 54 54 53 4 3 1 53 51 3 8 5 7 7 1 1 1 6 50 51 42 20 1 33 2 10 7 7 10 67 67 67 235 236 46 46 47 40 18 10 10 9 47 6 3 45 191 191 191 191 210 59 210 57 57 48 47 2 1 1 47 3 45 151 150 149 150 189 3 1 191 192 2 2 192 192 192 190 191 192 3 9 2 2 7 6 1 7 7 5 4 7 1 1 5 5 4 1 1 1 1 1 1 5 5 5 2 221 98 93 98 99 99 99 221 218 220 221 5 2 218 9 9 9 2 2 7 212 203 204 9 215 2 2 1 5 1 4 212 13 42 40 52 213 42 42 24 20 10 10 209 8 8 6 4 2 2 4 2 4 8 4 2 18 17 2 1 16 12 11 4 15 6 6 6 6 14 14 9 9 8 9 7 6 1 1 5 6 348 348 345 45 347 3 348 347 345 345 3 349 3 3 51 51 51 50 51 51 51 51 51 51 51 51 349 349 344 337 16 345 200 37 166 308 37 10 10 8 8 7 6 5 5 10 3 24 306 202 5 2 61 87 37 26 24 24 7 2 7 11 7 7 7 7 20 5 7 3 3 57 57 57 57 57 57 57 57 57 51 57 1 56 57 72 23 57 57 57 72 2 2 6 6 6 3 5 5 4 2 3 2 2 6 2 4 4 6 2 4 2 2 6 5 4 5 2 3 1 2 2 5 8 4 4 4 8 3 2 1 1 1 1 1 3 231 234 67 67 238 7 7 7 19 19 16 16 15 15 15 19 48 43 46 17 13 49 332 331 331 330 5 4 329 229 286 99 99 29 71 327 332 328 3 2 2 3 2 2 1 1 325 325 4 3 3 2 1 323 1 1 1 324 314 315 314 312 311 31 282 280 292 3 3 291 292 3 292 282 197 115 59 115 95 95 1 1 1 1 95 95 1 1 95 1 94 95 95 6 197 197 229 197 196 14 229 3 2 3 2 2 1 3 12 10 3 1 10 6 2 2 1 5 9 5 2 2 1 1 1 12 3 2 1 3 1 1 4 4 4 4 94 94 93 94 93 93 93 91 76 91 4 2 2 1 6 93 93 47 106 49 46 1 1 13 1 9 10 9 48 48 18 13 90 1 88 89 6 4 4 6 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 3 3 3 2 2 2 2 1 1 1 1 1 3 3 3 3 3 3 2 8 8 1 1 7 3 4 3 2 2 7 7 2 7 7 6 5 8 7 7 5 4 5 3 3 17 17 11 17 8 89 21 7 67 1 14 88 15 9 1 125 99 123 123 123 9 122 1 121 121 33 32 31 30 30 117 8 8 5 3 2 4 22 113 2 1 24 109 3 3 3 2 106 2 2 2 107 5 106 17 12 10 89 99 23 99 4 5 95 95 123 95 94 92 9 92 1 92 93 93 91 90 90 89 1 88 1 89 89 74 74 74 15 15 16 16 71 73 73 73 7 72 71 9 6 8 3 1 5 1 4 3 3 2 3 2 1 1 1 9 44 44 24 22 19 22 17 4 2 4 4 2 1 1 4 3 1 1 2 1 3 1 1 2 1 1 1 1 1 1 1 3 20 20 36 20 5 5 37 2 7 11 20 20 20 11 11 20 20 20 1 20 7 4 5 7 7 7 13 14 14 7 14 14 14 14 14 1 1 1 1 1 14 7 7 7 7 7 8 8 8 8 6 14 14 8 1 5 1 5 8 8 8 8 8 8 6 14 8 6 14 39 40 6 36 36 35 19 35 19 20 35 35 34 34 33 32 4 4 30 3 2 2 29 1 1 2 29 1 2 1 27 3 3 1 2 23 16 14 15 14 13 5 9 9 9 8 1 1 1 19 11 6 14 14 1 14 7 7 14 14 14 2 1 14 14 8 14 14 1 1 14 7 14 14 14 14 6 6 14 12 5 44 42 43 2 1 42 3 3 39 39 38 36 22 44 3 2 2 14 14 14 14 14 6 8 8 7 8 7 7 1 7 6 5 6 6 1 6 6 6 6 6 4 4 4 4 18 17 2 1 17 16 2 1 15 14 15 4 11 8 5 2 6 16 17 17 17 1 1 17 17 5 4 3 2 13 13 13 1 12 13 13 6 13 7 7 7 3 3 3 3 14 14 14 14 14 14 13 1 1 1 1 14 21 15 18 17 15 2 1 14 14 21 14 14 1 13 7 6 6 1 1 6 6 7 8 3 3 3 3 3 3 3 1 3 3 5 5 5 4 4 3 2 2 2 2 1 1 1 1 2 2 2 5 2 3 3 5 2 3 2 2 4 5 2 3 5 4 2 1 1 5 5 3 2 2 5 5 2 3 3 3 3 3 4 3 3 3 10 8 10 9 2 1 7 5 2 6 4 5 4 4 2 2 4 3 2 2 4 3 23 22 1 2 1 1 30 30 27 2 26 25 28 19 3 18 1 17 20 30 16 16 16 30 30 30 30 30 29 1 30 8 8 23 8 8 8 22 12 11 13 26 14 5 4 14 13 11 26 3 3 3 1 3 6 1 6 9 5 1 9 4 3 5 6 5 5 2 5 36 3 1 1 36 35 34 33 1 1 32 14 2 12 1 11 11 30 29 30 30 28 5 27 26 25 25 25 25 24 2 2 5 6 7 7 7 5 4 4 5 8 5 4 3 3 3 3 2 3 6 4 2 2 17 16 3 15 2 2 13 2 11 10 11 2 1 1 11 8 3 17 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 3 3 3 3 3 2 1 1 1 1 1 1 3 3 3 2 2 3 3 3 2 3 7 2 7 3 4 3 2 1 1 1 7 7 6 2 2 7 18 6 18 60 60 60 60 60 60 35 35 6 6 6 3 6 246 169 169 6 247 447 447 247 8 8 238 246 449 144 144 101 59 59 59 1 1 1 151 151 152 42 45 40 154 83 1 1 1 2 2 1 1 19 72 67 67 64 67 67 67 64 64 59 59 59 64 59 67 67 3 140 140 140 109 14 10 9 67 57 58 1 3 5 5 140 141 621 618 6 6 619 123 26 26 26 26 25 25 65 64 65 65 11 11 11 64 64 143 6 95 144 144 143 144 6 143 144 34 144 144 144 144 144 12 149 150 3 1 1 1 1 149 2 2 2 2 148 3 143 144 144 144 144 70 72 144 144 140 144 143 143 144 144 144 144 144 88 4 4 4 4 88 88 11 85 2 2 83 83 14 2 2 12 67 10 7 10 1 1 1 3 3 2 6 1 1 5 73 4 1 3 144 144 144 144 144 9 6 9 6 9 6 290 291 243 118 119 34 73 14 3 18 4 17 286 543 6 544 297 298 246 6 6 1 5 3 6 2 6 246 9 122 4 3 4 118 119 119 10 2 1 9 9 34 34 34 30 34 9 9 8 9 73 73 72 72 73 1 1 14 14 14 3 3 3 19 4 19 19 4 2 1 2 2 17 542 543 544 290 538 541 545 540 9 542 540 626 1 1 621 1 1 1 9 9 9 37 35 37 102 44 66 32 19 18 32 53 52 61 53 51 51 46 40 61 74 3 74 80 6 3 3 2 74 74 73 73 74 79 82 80 73 82 9 9 8 3 3 2 2 3 3 3 4 4 3 1 1 1 1 1 1 31 30 28 17 12 28 47 6 9 48 48 46 31 31 18 10 9 16 16 4 4 16 7 7 7 6 1 7 7 9 9 9 9 9 9 9 9 7 7 9 9 9 9 9 9 9 129 12 129 12 12 10 10 1 9 9 12 9 9 9 12 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773 7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787 7788 7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818 7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7831 7832 7833 7834 7835 7836 7837 7838 7839 7840 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871 7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901 7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7930 7931 7932 7933 7934 7935 7936 7937 7938 7939 7940 7941 7942 7943 7944 7945 7946 7947 7948 7949 7950 7951 7952 7953 7954 7955 7956 7957 7958 7959 7960 7961 7962 7963 7964 7965 7966 7967 7968 7969 7970 7971 7972 7973 7974 7975 7976 7977 7978 7979 7980 7981 7982 7983 7984 7985 7986 7987 7988 7989 7990 7991 7992 7993 7994 7995 7996 7997 7998 7999 8000 8001 8002 8003 8004 8005 8006 8007 8008 8009 8010 8011 8012 8013 8014 8015 8016 8017 8018 8019 8020 8021 8022 8023 8024 8025 8026 8027 8028 8029 8030 8031 8032 8033 8034 8035 8036 8037 8038 8039 8040 8041 8042 8043 8044 8045 8046 8047 8048 8049 8050 8051 8052 8053 8054 8055 8056 8057 8058 8059 8060 8061 8062 8063 8064 8065 8066 8067 8068 8069 8070 8071 8072 8073 8074 8075 8076 8077 8078 8079 8080 8081 8082 8083 8084 8085 8086 8087 8088 8089 8090 8091 8092 8093 8094 8095 8096 8097 8098 8099 8100 8101 8102 8103 8104 8105 8106 8107 8108 8109 8110 8111 8112 8113 8114 8115 8116 8117 8118 8119 8120 8121 8122 8123 8124 8125 8126 8127 8128 8129 8130 8131 8132 8133 8134 8135 8136 8137 8138 8139 8140 8141 8142 8143 8144 8145 8146 8147 8148 8149 8150 8151 8152 8153 8154 8155 8156 8157 8158 8159 8160 8161 8162 8163 8164 8165 8166 8167 8168 8169 8170 8171 8172 8173 8174 8175 8176 8177 8178 8179 8180 8181 8182 8183 8184 8185 8186 8187 8188 8189 8190 8191 8192 8193 8194 8195 8196 8197 8198 8199 8200 8201 8202 8203 8204 8205 8206 8207 8208 8209 8210 8211 8212 8213 8214 8215 8216 8217 8218 8219 8220 8221 8222 8223 8224 8225 8226 8227 8228 8229 8230 8231 8232 8233 8234 8235 8236 8237 8238 8239 8240 8241 8242 8243 8244 8245 8246 8247 8248 8249 8250 8251 8252 8253 8254 8255 8256 8257 8258 8259 8260 8261 8262 8263 8264 8265 8266 8267 8268 8269 8270 8271 8272 8273 8274 8275 8276 8277 8278 8279 8280 8281 8282 8283 8284 8285 8286 8287 8288 8289 8290 8291 8292 8293 8294 8295 8296 8297 8298 8299 8300 8301 8302 8303 8304 8305 8306 8307 8308 8309 8310 8311 8312 8313 8314 8315 8316 8317 8318 8319 8320 8321 8322 8323 8324 8325 8326 8327 8328 8329 8330 8331 8332 8333 8334 8335 8336 8337 8338 8339 8340 8341 8342 8343 8344 8345 8346 8347 8348 8349 8350 8351 8352 8353 8354 8355 8356 8357 8358 8359 8360 8361 8362 8363 8364 8365 8366 8367 8368 8369 8370 8371 8372 8373 8374 8375 8376 8377 8378 8379 8380 8381 8382 8383 8384 8385 8386 8387 8388 8389 8390 8391 8392 8393 8394 8395 8396 8397 8398 8399 8400 8401 8402 8403 8404 8405 8406 8407 8408 8409 8410 8411 8412 8413 8414 8415 8416 8417 8418 8419 8420 8421 8422 8423 8424 8425 8426 8427 8428 8429 8430 8431 8432 8433 8434 8435 8436 8437 8438 8439 8440 8441 8442 8443 8444 8445 8446 8447 8448 8449 8450 8451 8452 8453 8454 8455 8456 8457 8458 8459 8460 8461 8462 8463 8464 8465 8466 8467 8468 8469 8470 8471 8472 8473 8474 8475 8476 8477 8478 8479 8480 8481 8482 8483 8484 8485 8486 8487 8488 8489 8490 8491 8492 8493 8494 8495 8496 8497 8498 8499 8500 8501 8502 8503 8504 8505 8506 8507 8508 8509 8510 8511 8512 8513 8514 8515 8516 8517 8518 8519 8520 8521 8522 8523 8524 8525 8526 8527 8528 8529 8530 8531 8532 8533 8534 8535 8536 8537 8538 8539 8540 8541 8542 8543 8544 8545 8546 8547 8548 8549 8550 8551 8552 8553 8554 8555 8556 8557 8558 8559 8560 8561 8562 8563 8564 8565 8566 8567 8568 8569 8570 8571 8572 8573 8574 8575 8576 8577 8578 8579 8580 8581 8582 8583 8584 8585 8586 8587 8588 8589 8590 8591 8592 8593 8594 8595 8596 8597 8598 8599 8600 8601 8602 8603 8604 8605 8606 8607 8608 8609 8610 8611 8612 8613 8614 8615 8616 8617 8618 8619 8620 8621 8622 8623 8624 8625 8626 8627 8628 8629 8630 8631 8632 8633 8634 8635 8636 8637 8638 8639 8640 8641 8642 8643 8644 8645 8646 8647 8648 8649 8650 8651 8652 8653 8654 8655 8656 8657 8658 8659 8660 8661 8662 8663 8664 8665 8666 8667 8668 8669 8670 8671 8672 8673 8674 8675 8676 8677 8678 8679 8680 8681 8682 8683 8684 8685 8686 8687 8688 8689 8690 8691 8692 8693 8694 8695 8696 8697 8698 8699 8700 8701 8702 8703 8704 8705 8706 8707 8708 8709 8710 8711 8712 8713 8714 8715 8716 8717 8718 8719 8720 8721 8722 8723 8724 8725 8726 8727 8728 8729 8730 8731 8732 8733 8734 8735 8736 8737 8738 8739 8740 8741 8742 8743 8744 8745 8746 8747 8748 8749 8750 8751 8752 8753 8754 8755 8756 8757 8758 8759 8760 8761 8762 8763 8764 8765 8766 8767 8768 8769 8770 8771 8772 8773 8774 8775 8776 8777 8778 8779 8780 8781 8782 8783 8784 8785 8786 8787 8788 8789 8790 8791 8792 8793 8794 8795 8796 8797 8798 8799 8800 8801 8802 8803 8804 8805 8806 8807 8808 8809 8810 8811 8812 8813 8814 8815 8816 8817 8818 8819 8820 8821 8822 8823 8824 8825 8826 8827 8828 8829 8830 8831 8832 8833 8834 8835 8836 8837 8838 8839 8840 8841 8842 8843 8844 8845 8846 8847 8848 8849 8850 8851 8852 8853 8854 8855 8856 8857 8858 8859 8860 8861 8862 8863 8864 8865 8866 8867 8868 8869 8870 8871 8872 8873 8874 8875 8876 8877 8878 8879 8880 8881 8882 8883 8884 8885 8886 8887 8888 8889 8890 8891 8892 8893 8894 8895 8896 8897 8898 8899 8900 8901 8902 8903 8904 8905 8906 8907 8908 8909 8910 8911 8912 8913 8914 8915 8916 8917 8918 8919 8920 8921 8922 8923 8924 8925 8926 8927 8928 8929 8930 8931 8932 8933 8934 8935 8936 8937 8938 8939 8940 8941 8942 8943 8944 8945 8946 8947 8948 8949 8950 8951 8952 8953 8954 8955 8956 8957 8958 8959 8960 8961 8962 8963 8964 8965 8966 8967 8968 8969 8970 8971 8972 8973 8974 8975 8976 8977 8978 8979 8980 8981 8982 8983 8984 8985 8986 8987 8988 8989 8990 8991 8992 8993 8994 8995 8996 8997 8998 8999 9000 9001 9002 9003 9004 9005 9006 9007 9008 9009 9010 9011 9012 9013 9014 9015 9016 9017 9018 9019 9020 9021 9022 9023 9024 9025 9026 9027 9028 9029 9030 9031 9032 9033 9034 9035 9036 9037 9038 9039 9040 9041 9042 9043 9044 9045 9046 9047 9048 9049 9050 9051 9052 9053 9054 9055 9056 9057 9058 9059 9060 9061 9062 9063 9064 9065 9066 9067 9068 9069 9070 9071 9072 9073 9074 9075 9076 9077 9078 9079 9080 9081 9082 9083 9084 9085 9086 9087 9088 9089 9090 9091 9092 9093 9094 9095 9096 9097 9098 9099 9100 9101 9102 9103 9104 9105 9106 9107 9108 9109 9110 9111 9112 9113 9114 9115 9116 9117 9118 9119 9120 9121 9122 9123 9124 9125 9126 9127 9128 9129 9130 9131 9132 9133 9134 9135 9136 9137 9138 9139 9140 9141 9142 9143 9144 9145 9146 9147 9148 9149 9150 9151 9152 9153 9154 9155 9156 9157 9158 9159 9160 9161 9162 9163 9164 9165 9166 9167 9168 9169 9170 9171 9172 9173 9174 9175 9176 9177 9178 9179 9180 9181 9182 9183 9184 9185 9186 9187 9188 9189 9190 9191 9192 9193 9194 9195 9196 9197 9198 9199 9200 9201 9202 9203 9204 9205 9206 9207 9208 9209 9210 9211 9212 9213 9214 9215 9216 9217 9218 9219 9220 9221 9222 9223 9224 9225 9226 9227 9228 9229 9230 9231 9232 9233 9234 9235 9236 9237 9238 9239 9240 9241 9242 9243 9244 9245 9246 9247 9248 9249 9250 9251 9252 9253 9254 9255 9256 9257 9258 9259 9260 9261 9262 9263 9264 9265 9266 9267 9268 9269 9270 9271 9272 9273 9274 9275 9276 9277 9278 9279 9280 9281 9282 9283 9284 9285 9286 9287 9288 9289 9290 9291 9292 9293 9294 9295 9296 9297 9298 9299 9300 9301 9302 9303 9304 9305 9306 9307 9308 9309 9310 9311 9312 9313 9314 9315 9316 9317 9318 9319 9320 9321 9322 9323 9324 9325 9326 9327 9328 9329 9330 9331 9332 9333 9334 9335 9336 9337 9338 9339 9340 9341 9342 9343 9344 9345 9346 9347 9348 9349 9350 9351 9352 9353 9354 9355 9356 9357 9358 9359 9360 9361 9362 9363 9364 9365 9366 9367 9368 9369 9370 9371 9372 9373 9374 9375 9376 9377 9378 9379 9380 9381 9382 9383 9384 9385 9386 9387 9388 9389 9390 9391 9392 9393 9394 9395 9396 9397 9398 9399 9400 9401 9402 9403 9404 9405 9406 9407 9408 9409 9410 9411 9412 9413 9414 9415 9416 9417 9418 9419 9420 9421 9422 9423 9424 9425 9426 9427 9428 9429 9430 9431 9432 9433 9434 9435 9436 9437 9438 9439 9440 9441 9442 9443 9444 9445 9446 9447 9448 9449 9450 9451 9452 9453 9454 9455 9456 9457 9458 9459 9460 9461 9462 9463 9464 9465 9466 9467 9468 9469 9470 9471 9472 9473 9474 9475 9476 9477 9478 9479 9480 9481 9482 9483 9484 9485 9486 9487 9488 9489 9490 9491 9492 9493 9494 9495 9496 9497 9498 9499 9500 9501 9502 9503 9504 9505 9506 9507 9508 9509 9510 9511 9512 9513 9514 9515 9516 9517 9518 9519 9520 9521 9522 9523 9524 9525 9526 9527 9528 9529 9530 9531 9532 9533 9534 9535 9536 9537 9538 9539 9540 9541 9542 9543 9544 9545 9546 9547 9548 9549 9550 9551 9552 9553 9554 9555 9556 9557 9558 9559 9560 9561 9562 9563 9564 9565 9566 9567 9568 9569 9570 9571 9572 9573 9574 9575 9576 9577 9578 9579 9580 9581 9582 9583 9584 9585 9586 9587 9588 9589 9590 9591 9592 9593 9594 9595 9596 9597 9598 9599 9600 9601 9602 9603 9604 9605 9606 9607 9608 9609 9610 9611 9612 9613 9614 9615 9616 9617 9618 9619 9620 9621 9622 9623 9624 9625 9626 9627 9628 9629 9630 9631 9632 9633 9634 9635 9636 9637 9638 9639 9640 9641 9642 9643 9644 9645 9646 9647 9648 9649 9650 9651 9652 9653 9654 9655 9656 9657 9658 9659 9660 9661 9662 9663 9664 9665 9666 9667 9668 9669 9670 9671 9672 9673 9674 9675 9676 9677 9678 9679 9680 9681 9682 9683 9684 9685 9686 9687 9688 9689 9690 9691 9692 9693 9694 9695 9696 9697 9698 9699 9700 9701 9702 9703 9704 9705 9706 9707 9708 9709 9710 9711 9712 9713 9714 9715 9716 9717 9718 9719 9720 9721 9722 9723 9724 9725 9726 9727 9728 9729 9730 9731 9732 9733 9734 9735 9736 9737 9738 9739 9740 9741 9742 9743 9744 9745 9746 9747 9748 9749 9750 9751 9752 9753 9754 9755 9756 9757 9758 9759 9760 9761 9762 9763 9764 9765 9766 9767 9768 9769 9770 9771 9772 9773 9774 9775 9776 9777 9778 9779 9780 9781 9782 9783 9784 9785 9786 9787 9788 9789 9790 9791 9792 9793 9794 9795 9796 9797 9798 9799 9800 9801 9802 9803 9804 9805 9806 9807 9808 9809 9810 9811 9812 9813 9814 9815 9816 9817 9818 9819 9820 9821 9822 9823 9824 9825 9826 9827 9828 9829 9830 9831 9832 9833 9834 9835 9836 9837 9838 9839 9840 9841 9842 9843 9844 9845 9846 9847 9848 9849 9850 9851 9852 9853 9854 9855 9856 9857 9858 9859 9860 9861 9862 9863 9864 9865 9866 9867 9868 9869 9870 9871 9872 9873 9874 9875 9876 9877 9878 9879 9880 9881 9882 9883 9884 9885 9886 9887 9888 9889 9890 9891 9892 9893 9894 9895 9896 9897 9898 9899 9900 9901 9902 9903 9904 9905 9906 9907 9908 9909 9910 9911 9912 9913 9914 9915 9916 9917 9918 9919 9920 9921 9922 9923 9924 9925 9926 9927 9928 9929 9930 9931 9932 9933 9934 9935 9936 9937 9938 9939 9940 9941 9942 9943 9944 9945 9946 9947 9948 9949 9950 9951 9952 9953 9954 9955 9956 9957 9958 9959 9960 9961 9962 9963 9964 9965 9966 9967 9968 9969 9970 9971 9972 9973 9974 9975 9976 9977 9978 9979 9980 9981 9982 9983 9984 9985 9986 9987 9988 9989 9990 9991 9992 9993 9994 9995 9996 9997 9998 9999 10000 10001 10002 10003 10004 10005 10006 10007 10008 10009 10010 10011 10012 10013 10014 10015 10016 10017 10018 10019 10020 10021 10022 10023 10024 10025 10026 10027 10028 10029 10030 10031 10032 10033 10034 10035 10036 10037 10038 10039 10040 10041 10042 10043 10044 10045 10046 10047 10048 10049 10050 10051 10052 10053 10054 10055 10056 10057 10058 10059 10060 10061 10062 10063 10064 10065 10066 10067 10068 10069 10070 10071 10072 10073 10074 10075 10076 10077 10078 10079 10080 10081 10082 10083 10084 10085 10086 10087 10088 10089 10090 10091 10092 10093 10094 10095 10096 10097 10098 10099 10100 10101 10102 10103 10104 10105 10106 10107 10108 10109 10110 10111 10112 10113 10114 10115 10116 10117 10118 10119 10120 10121 10122 10123 10124 10125 10126 10127 10128 10129 10130 10131 10132 10133 10134 10135 10136 10137 10138 10139 10140 10141 10142 10143 10144 10145 10146 10147 10148 10149 10150 10151 10152 10153 10154 10155 10156 10157 10158 10159 10160 10161 10162 10163 10164 10165 10166 10167 10168 10169 10170 10171 10172 10173 10174 10175 10176 10177 10178 10179 10180 10181 10182 10183 10184 10185 10186 10187 10188 10189 10190 10191 10192 10193 10194 10195 10196 10197 10198 10199 10200 10201 10202 10203 10204 10205 10206 10207 10208 10209 10210 10211 10212 10213 10214 10215 10216 10217 10218 10219 10220 10221 10222 10223 10224 10225 10226 10227 10228 10229 10230 10231 10232 10233 10234 10235 10236 10237 10238 10239 10240 10241 10242 10243 10244 10245 10246 10247 10248 10249 10250 10251 10252 10253 10254 10255 10256 10257 10258 10259 10260 10261 10262 10263 10264 10265 10266 10267 10268 10269 10270 10271 10272 10273 10274 10275 10276 10277 10278 10279 10280 10281 10282 10283 10284 10285 10286 10287 10288 10289 10290 10291 10292 10293 10294 10295 10296 10297 10298 10299 10300 10301 10302 10303 10304 10305 10306 10307 10308 10309 10310 10311 10312 10313 10314 10315 10316 10317 10318 10319 10320 10321 10322 10323 10324 10325 10326 10327 10328 10329 10330 10331 10332 10333 10334 10335 10336 10337 10338 10339 10340 10341 10342 10343 10344 10345 10346 10347 10348 10349 10350 10351 10352 10353 10354 10355 10356 10357 10358 10359 10360 10361 10362 10363 10364 10365 10366 10367 10368 10369 10370 10371 10372 10373 10374 10375 10376 10377 10378 10379 10380 10381 10382 10383 10384 10385 10386 10387 10388 10389 10390 10391 10392 10393 10394 10395 10396 10397 10398 10399 10400 10401 10402 10403 10404 10405 10406 10407 10408 10409 10410 10411 10412 10413 10414 10415 10416 10417 10418 10419 10420 10421 10422 10423 10424 10425 10426 10427 10428 10429 10430 10431 10432 10433 10434 10435 10436 10437 10438 10439 10440 10441 10442 10443 10444 10445 10446 10447 10448 10449 10450 10451 10452 10453 10454 10455 10456 10457 10458 10459 10460 10461 10462 10463 10464 10465 10466 10467 10468 10469 10470 10471 10472 10473 10474 10475 10476 10477 10478 10479 10480 10481 10482 10483 10484 10485 10486 10487 10488 10489 10490 10491 10492 10493 10494 10495 10496 10497 10498 10499 10500 10501 10502 10503 10504 10505 10506 10507 10508 10509 10510 10511 10512 10513 10514 10515 10516 10517 10518 10519 10520 10521 10522 10523 10524 10525 10526 10527 10528 10529 10530 10531 10532 10533 10534 10535 10536 10537 10538 10539 10540 10541 10542 10543 10544 10545 10546 10547 10548 10549 10550 10551 10552 10553 10554 10555 10556 10557 10558 10559 10560 10561 10562 10563 10564 10565 10566 10567 10568 10569 10570 10571 10572 10573 10574 10575 10576 10577 10578 10579 10580 10581 10582 10583 10584 10585 10586 10587 10588 10589 10590 10591 10592 10593 10594 10595 10596 10597 10598 10599 10600 10601 10602 10603 10604 10605 10606 10607 10608 10609 10610 10611 10612 10613 10614 10615 10616 10617 10618 10619 10620 10621 10622 10623 10624 10625 10626 10627 10628 10629 10630 10631 10632 10633 10634 10635 10636 10637 10638 10639 10640 10641 10642 10643 10644 10645 10646 10647 10648 10649 10650 10651 10652 10653 10654 10655 10656 10657 10658 10659 10660 10661 10662 10663 10664 10665 10666 10667 10668 10669 10670 10671 10672 10673 10674 10675 10676 10677 10678 10679 10680 10681 10682 10683 10684 10685 10686 10687 10688 10689 10690 10691 10692 10693 10694 10695 10696 10697 10698 10699 10700 10701 10702 10703 10704 10705 10706 10707 10708 10709 10710 10711 10712 10713 10714 10715 10716 10717 10718 10719 10720 10721 10722 10723 10724 10725 10726 10727 10728 10729 10730 10731 10732 10733 10734 10735 10736 10737 10738 10739 10740 10741 10742 10743 10744 10745 10746 10747 10748 10749 10750 10751 10752 10753 10754 10755 10756 10757 10758 10759 10760 10761 10762 10763 10764 10765 10766 10767 10768 10769 10770 10771 10772 10773 10774 10775 10776 10777 10778 10779 10780 10781 10782 10783 10784 10785 10786 10787 10788 10789 10790 10791 10792 10793 10794 10795 10796 10797 10798 10799 10800 10801 10802 10803 10804 10805 10806 10807 10808 10809 10810 10811 10812 10813 10814 10815 10816 10817 10818 10819 10820 10821 10822 10823 10824 10825 10826 10827 10828 10829 10830 10831 10832 10833 10834 10835 10836 10837 10838 10839 10840 10841 10842 10843 10844 10845 10846 10847 10848 10849 10850 10851 10852 10853 10854 10855 10856 10857 10858 10859 10860 10861 10862 10863 10864 10865 10866 10867 10868 10869 10870 10871 10872 10873 10874 10875 10876 10877 10878 10879 10880 10881 10882 10883 10884 10885 10886 10887 10888 10889 10890 10891 10892 10893 10894 10895 10896 10897 10898 10899 10900 10901 10902 10903 10904 10905 10906 10907 10908 10909 10910 10911 10912 10913 10914 10915 10916 10917 10918 10919 10920 10921 10922 10923 10924 10925 10926 10927 10928 10929 10930 10931 10932 10933 10934 10935 10936 10937 10938 10939 10940 10941 10942 10943 10944 10945 10946 10947 10948 10949 10950 10951 10952 10953 10954 10955 10956 10957 10958 10959 10960 10961 10962 10963 10964 10965 10966 10967 10968 10969 10970 10971 10972 10973 10974 10975 10976 10977 10978 10979 10980 10981 10982 10983 10984 10985 10986 10987 10988 10989 10990 10991 10992 10993 10994 10995 10996 10997 10998 10999 11000 11001 11002 11003 11004 11005 11006 11007 11008 11009 11010 11011 11012 11013 11014 11015 11016 11017 11018 11019 11020 11021 11022 11023 11024 11025 11026 11027 11028 11029 11030 11031 11032 11033 11034 11035 11036 11037 11038 11039 11040 11041 11042 11043 11044 11045 11046 11047 11048 11049 11050 11051 11052 11053 11054 11055 11056 11057 11058 11059 11060 11061 11062 11063 11064 11065 11066 11067 11068 11069 11070 11071 11072 11073 11074 11075 11076 11077 11078 11079 11080 11081 11082 11083 11084 11085 11086 11087 11088 11089 11090 11091 11092 11093 11094 11095 11096 11097 11098 11099 11100 11101 11102 11103 11104 11105 11106 11107 11108 11109 11110 11111 11112 11113 11114 11115 11116 11117 11118 11119 11120 11121 11122 11123 11124 11125 11126 11127 11128 11129 11130 11131 11132 11133 11134 11135 11136 11137 11138 11139 11140 11141 11142 11143 11144 11145 11146 11147 11148 11149 11150 11151 11152 11153 11154 11155 11156 11157 11158 11159 11160 11161 11162 11163 11164 11165 11166 11167 11168 11169 11170 11171 11172 11173 11174 11175 11176 11177 11178 11179 11180 11181 11182 11183 11184 11185 11186 11187 11188 11189 11190 11191 11192 11193 11194 11195 11196 11197 11198 11199 11200 11201 11202 11203 11204 11205 11206 11207 11208 11209 11210 11211 11212 11213 11214 11215 11216 11217 11218 11219 11220 11221 11222 11223 11224 11225 11226 11227 11228 11229 11230 11231 11232 11233 11234 11235 11236 11237 11238 11239 11240 11241 11242 11243 11244 11245 11246 11247 11248 11249 11250 11251 11252 11253 11254 11255 11256 11257 11258 11259 11260 11261 11262 11263 11264 11265 11266 11267 11268 11269 11270 11271 11272 11273 11274 11275 11276 11277 11278 11279 11280 11281 11282 11283 11284 11285 11286 11287 11288 11289 11290 11291 11292 11293 11294 11295 11296 11297 11298 11299 11300 11301 11302 11303 11304 11305 11306 11307 11308 11309 11310 11311 11312 11313 11314 11315 11316 11317 11318 11319 11320 11321 11322 11323 11324 11325 11326 11327 11328 11329 11330 11331 11332 11333 11334 11335 11336 11337 11338 11339 11340 11341 11342 11343 11344 11345 11346 11347 11348 11349 11350 11351 11352 11353 11354 11355 11356 11357 11358 11359 11360 11361 11362 11363 11364 11365 11366 11367 11368 11369 11370 11371 11372 11373 11374 11375 11376 11377 11378 11379 11380 11381 11382 11383 11384 11385 11386 11387 11388 11389 11390 11391 11392 11393 11394 11395 11396 11397 11398 11399 11400 11401 11402 11403 11404 11405 11406 11407 11408 11409 11410 11411 11412 11413 11414 11415 11416 11417 11418 11419 11420 11421 11422 11423 11424 11425 11426 11427 11428 11429 11430 11431 11432 11433 11434 11435 11436 11437 11438 11439 11440 11441 11442 11443 11444 11445 11446 11447 11448 11449 11450 11451 11452 11453 11454 11455 11456 11457 11458 11459 11460 11461 11462 11463 11464 11465 11466 11467 11468 11469 11470 11471 11472 11473 11474 11475 11476 11477 11478 11479 11480 11481 11482 11483 11484 11485 11486 11487 11488 11489 11490 11491 11492 11493 11494 11495 11496 11497 11498 11499 11500 11501 11502 11503 11504 11505 11506 11507 11508 11509 11510 11511 11512 11513 11514 11515 11516 11517 11518 11519 11520 11521 11522 11523 11524 11525 11526 11527 11528 11529 11530 11531 11532 11533 11534 11535 11536 11537 11538 11539 11540 11541 11542 11543 11544 11545 11546 11547 11548 11549 11550 11551 11552 11553 11554 11555 11556 11557 11558 11559 11560 11561 11562 11563 11564 11565 11566 11567 11568 11569 11570 11571 11572 11573 11574 11575 11576 11577 11578 11579 11580 11581 11582 11583 11584 11585 11586 11587 11588 11589 11590 11591 11592 11593 11594 11595 11596 11597 11598 11599 11600 11601 11602 11603 11604 11605 11606 11607 11608 11609 11610 11611 11612 11613 11614 11615 11616 11617 11618 11619 11620 11621 11622 11623 11624 11625 11626 11627 11628 11629 11630 11631 11632 11633 11634 11635 11636 11637 11638 11639 11640 11641 11642 11643 11644 11645 11646 11647 11648 11649 11650 11651 11652 11653 11654 11655 11656 11657 11658 11659 11660 11661 11662 11663 11664 11665 11666 11667 11668 11669 11670 11671 11672 11673 11674 11675 11676 11677 11678 11679 11680 11681 11682 11683 11684 11685 11686 11687 11688 11689 11690 11691 11692 11693 11694 11695 11696 11697 11698 11699 11700 11701 11702 11703 11704 11705 11706 11707 11708 11709 11710 11711 11712 11713 11714 11715 11716 11717 11718 11719 11720 11721 11722 11723 11724 11725 11726 11727 11728 11729 11730 11731 11732 11733 11734 11735 11736 11737 11738 11739 11740 11741 11742 11743 11744 11745 11746 11747 11748 11749 11750 11751 11752 11753 11754 11755 11756 11757 11758 11759 11760 11761 11762 11763 11764 11765 11766 11767 11768 11769 11770 11771 11772 11773 11774 11775 11776 11777 11778 11779 11780 11781 11782 11783 11784 11785 11786 11787 11788 11789 11790 11791 11792 11793 11794 11795 11796 11797 11798 11799 11800 11801 11802 11803 11804 11805 11806 11807 11808 11809 11810 11811 11812 11813 11814 11815 11816 11817 11818 11819 11820 11821 11822 11823 11824 11825 11826 11827 11828 11829 11830 11831 11832 11833 11834 11835 11836 11837 11838 11839 11840 11841 11842 11843 11844 11845 11846 11847 11848 11849 11850 11851 11852 11853 11854 11855 11856 11857 11858 11859 11860 11861 11862 11863 11864 11865 11866 11867 11868 11869 11870 11871 11872 11873 11874 11875 11876 11877 11878 11879 11880 11881 11882 11883 11884 11885 11886 11887 11888 11889 11890 11891 11892 11893 11894 11895 11896 11897 11898 11899 11900 11901 11902 11903 11904 11905 11906 11907 11908 11909 11910 11911 11912 11913 11914 11915 11916 11917 11918 11919 11920 11921 11922 11923 11924 11925 11926 11927 11928 11929 11930 11931 11932 11933 11934 11935 11936 11937 11938 11939 11940 11941 11942 11943 11944 11945 11946 11947 11948 11949 11950 11951 11952 11953 11954 11955 11956 11957 11958 11959 11960 11961 11962 11963 11964 11965 11966 11967 11968 11969 11970 11971 11972 11973 11974 11975 11976 11977 11978 11979 11980 11981 11982 11983 11984 11985 11986 11987 11988 11989 11990 11991 11992 11993 11994 11995 11996 11997 11998 11999 12000 12001 12002 12003 12004 12005 12006 12007 12008 12009 12010 12011 12012 12013 12014 12015 12016 12017 12018 12019 12020 12021 12022 12023 12024 12025 12026 12027 12028 12029 12030 12031 12032 12033 12034 12035 12036 12037 12038 12039 12040 12041 12042 12043 12044 12045 12046 12047 12048 12049 12050 12051 12052 12053 12054 12055 12056 12057 12058 12059 12060 12061 12062 12063 12064 12065 12066 12067 12068 12069 12070 12071 12072 12073 12074 12075 12076 12077 12078 12079 12080 12081 12082 12083 12084 12085 12086 12087 12088 12089 12090 12091 12092 12093 12094 12095 12096 12097 12098 12099 12100 12101 12102 12103 12104 12105 12106 12107 12108 12109 12110 12111 12112 12113 12114 12115 12116 12117 12118 12119 12120 12121 12122 12123 12124 12125 12126 12127 12128 12129 12130 12131 12132 12133 12134 12135 12136 12137 12138 12139 12140 12141 12142 12143 12144 12145 12146 12147 12148 12149 12150 12151 12152 12153 12154 12155 12156 12157 12158 12159 12160 12161 12162 12163 12164 12165 12166 12167 12168 12169 12170 12171 12172 12173 12174 12175 12176 12177 12178 12179 12180 12181 12182 12183 12184 12185 12186 12187 12188 12189 12190 12191 12192 12193 12194 12195 12196 12197 12198 12199 12200 12201 12202 12203 12204 12205 12206 12207 12208 12209 12210 12211 12212 12213 12214 12215 12216 12217 12218 12219 12220 12221 12222 12223 12224 12225 12226 12227 12228 12229 12230 12231 12232 12233 12234 12235 12236 12237 12238 12239 12240 12241 12242 12243 12244 12245 12246 12247 12248 12249 12250 12251 12252 12253 12254 12255 12256 12257 12258 12259 12260 12261 12262 12263 12264 12265 12266 12267 12268 12269 12270 12271 12272 12273 12274 12275 12276 12277 12278 12279 12280 12281 12282 12283 12284 12285 12286 12287 12288 12289 12290 12291 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2007-2009 Patrick McHardy <kaber@trash.net> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <linux/module.h> #include <linux/init.h> #include <linux/list.h> #include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/vmalloc.h> #include <linux/rhashtable.h> #include <linux/audit.h> #include <linux/netfilter.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_flow_table.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_offload.h> #include <net/net_namespace.h> #include <net/sock.h> #define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-")) #define NFT_SET_MAX_ANONLEN 16 /* limit compaction to avoid huge kmalloc/krealloc sizes. */ #define NFT_MAX_SET_NELEMS ((2048 - sizeof(struct nft_trans_elem)) / sizeof(struct nft_trans_one_elem)) unsigned int nf_tables_net_id __read_mostly; static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); static LIST_HEAD(nf_tables_gc_list); static DEFINE_SPINLOCK(nf_tables_destroy_list_lock); static DEFINE_SPINLOCK(nf_tables_gc_list_lock); enum { NFT_VALIDATE_SKIP = 0, NFT_VALIDATE_NEED, NFT_VALIDATE_DO, }; static struct rhltable nft_objname_ht; static u32 nft_chain_hash(const void *data, u32 len, u32 seed); static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed); static int nft_chain_hash_cmp(struct rhashtable_compare_arg *, const void *); static u32 nft_objname_hash(const void *data, u32 len, u32 seed); static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed); static int nft_objname_hash_cmp(struct rhashtable_compare_arg *, const void *); static const struct rhashtable_params nft_chain_ht_params = { .head_offset = offsetof(struct nft_chain, rhlhead), .key_offset = offsetof(struct nft_chain, name), .hashfn = nft_chain_hash, .obj_hashfn = nft_chain_hash_obj, .obj_cmpfn = nft_chain_hash_cmp, .automatic_shrinking = true, }; static const struct rhashtable_params nft_objname_ht_params = { .head_offset = offsetof(struct nft_object, rhlhead), .key_offset = offsetof(struct nft_object, key), .hashfn = nft_objname_hash, .obj_hashfn = nft_objname_hash_obj, .obj_cmpfn = nft_objname_hash_cmp, .automatic_shrinking = true, }; struct nft_audit_data { struct nft_table *table; int entries; int op; struct list_head list; }; static const u8 nft2audit_op[NFT_MSG_MAX] = { // enum nf_tables_msg_types [NFT_MSG_NEWTABLE] = AUDIT_NFT_OP_TABLE_REGISTER, [NFT_MSG_GETTABLE] = AUDIT_NFT_OP_INVALID, [NFT_MSG_DELTABLE] = AUDIT_NFT_OP_TABLE_UNREGISTER, [NFT_MSG_NEWCHAIN] = AUDIT_NFT_OP_CHAIN_REGISTER, [NFT_MSG_GETCHAIN] = AUDIT_NFT_OP_INVALID, [NFT_MSG_DELCHAIN] = AUDIT_NFT_OP_CHAIN_UNREGISTER, [NFT_MSG_NEWRULE] = AUDIT_NFT_OP_RULE_REGISTER, [NFT_MSG_GETRULE] = AUDIT_NFT_OP_INVALID, [NFT_MSG_DELRULE] = AUDIT_NFT_OP_RULE_UNREGISTER, [NFT_MSG_NEWSET] = AUDIT_NFT_OP_SET_REGISTER, [NFT_MSG_GETSET] = AUDIT_NFT_OP_INVALID, [NFT_MSG_DELSET] = AUDIT_NFT_OP_SET_UNREGISTER, [NFT_MSG_NEWSETELEM] = AUDIT_NFT_OP_SETELEM_REGISTER, [NFT_MSG_GETSETELEM] = AUDIT_NFT_OP_INVALID, [NFT_MSG_DELSETELEM] = AUDIT_NFT_OP_SETELEM_UNREGISTER, [NFT_MSG_NEWGEN] = AUDIT_NFT_OP_GEN_REGISTER, [NFT_MSG_GETGEN] = AUDIT_NFT_OP_INVALID, [NFT_MSG_TRACE] = AUDIT_NFT_OP_INVALID, [NFT_MSG_NEWOBJ] = AUDIT_NFT_OP_OBJ_REGISTER, [NFT_MSG_GETOBJ] = AUDIT_NFT_OP_INVALID, [NFT_MSG_DELOBJ] = AUDIT_NFT_OP_OBJ_UNREGISTER, [NFT_MSG_GETOBJ_RESET] = AUDIT_NFT_OP_OBJ_RESET, [NFT_MSG_NEWFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_REGISTER, [NFT_MSG_GETFLOWTABLE] = AUDIT_NFT_OP_INVALID, [NFT_MSG_DELFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, [NFT_MSG_GETSETELEM_RESET] = AUDIT_NFT_OP_SETELEM_RESET, }; static void nft_validate_state_update(struct nft_table *table, u8 new_validate_state) { switch (table->validate_state) { case NFT_VALIDATE_SKIP: WARN_ON_ONCE(new_validate_state == NFT_VALIDATE_DO); break; case NFT_VALIDATE_NEED: break; case NFT_VALIDATE_DO: if (new_validate_state == NFT_VALIDATE_NEED) return; } table->validate_state = new_validate_state; } static void nf_tables_trans_destroy_work(struct work_struct *w); static void nft_trans_gc_work(struct work_struct *work); static DECLARE_WORK(trans_gc_work, nft_trans_gc_work); static void nft_ctx_init(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, const struct nlmsghdr *nlh, u8 family, struct nft_table *table, struct nft_chain *chain, const struct nlattr * const *nla) { ctx->net = net; ctx->family = family; ctx->level = 0; ctx->table = table; ctx->chain = chain; ctx->nla = nla; ctx->portid = NETLINK_CB(skb).portid; ctx->report = nlmsg_report(nlh); ctx->flags = nlh->nlmsg_flags; ctx->seq = nlh->nlmsg_seq; bitmap_zero(ctx->reg_inited, NFT_REG32_NUM); } static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, int msg_type, u32 size, gfp_t gfp) { struct nft_trans *trans; trans = kzalloc(size, gfp); if (trans == NULL) return NULL; INIT_LIST_HEAD(&trans->list); trans->msg_type = msg_type; trans->net = ctx->net; trans->table = ctx->table; trans->seq = ctx->seq; trans->flags = ctx->flags; trans->report = ctx->report; return trans; } static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, int msg_type, u32 size) { return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL); } static struct nft_trans_binding *nft_trans_get_binding(struct nft_trans *trans) { switch (trans->msg_type) { case NFT_MSG_NEWCHAIN: case NFT_MSG_NEWSET: return container_of(trans, struct nft_trans_binding, nft_trans); } return NULL; } static void nft_trans_list_del(struct nft_trans *trans) { struct nft_trans_binding *trans_binding; list_del(&trans->list); trans_binding = nft_trans_get_binding(trans); if (trans_binding) list_del(&trans_binding->binding_list); } static void nft_trans_destroy(struct nft_trans *trans) { nft_trans_list_del(trans); kfree(trans); } static void __nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set, bool bind) { struct nftables_pernet *nft_net; struct net *net = ctx->net; struct nft_trans *trans; if (!nft_set_is_anonymous(set)) return; nft_net = nft_pernet(net); list_for_each_entry_reverse(trans, &nft_net->commit_list, list) { switch (trans->msg_type) { case NFT_MSG_NEWSET: if (nft_trans_set(trans) == set) nft_trans_set_bound(trans) = bind; break; case NFT_MSG_NEWSETELEM: if (nft_trans_elem_set(trans) == set) nft_trans_elem_set_bound(trans) = bind; break; } } } static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) { return __nft_set_trans_bind(ctx, set, true); } static void nft_set_trans_unbind(const struct nft_ctx *ctx, struct nft_set *set) { return __nft_set_trans_bind(ctx, set, false); } static void __nft_chain_trans_bind(const struct nft_ctx *ctx, struct nft_chain *chain, bool bind) { struct nftables_pernet *nft_net; struct net *net = ctx->net; struct nft_trans *trans; if (!nft_chain_binding(chain)) return; nft_net = nft_pernet(net); list_for_each_entry_reverse(trans, &nft_net->commit_list, list) { switch (trans->msg_type) { case NFT_MSG_NEWCHAIN: if (nft_trans_chain(trans) == chain) nft_trans_chain_bound(trans) = bind; break; case NFT_MSG_NEWRULE: if (nft_trans_rule_chain(trans) == chain) nft_trans_rule_bound(trans) = bind; break; } } } static void nft_chain_trans_bind(const struct nft_ctx *ctx, struct nft_chain *chain) { __nft_chain_trans_bind(ctx, chain, true); } int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain) { if (!nft_chain_binding(chain)) return 0; if (nft_chain_binding(ctx->chain)) return -EOPNOTSUPP; if (chain->bound) return -EBUSY; if (!nft_use_inc(&chain->use)) return -EMFILE; chain->bound = true; nft_chain_trans_bind(ctx, chain); return 0; } void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain) { __nft_chain_trans_bind(ctx, chain, false); } static int nft_netdev_register_hooks(struct net *net, struct list_head *hook_list) { struct nf_hook_ops *ops; struct nft_hook *hook; int err, j; j = 0; list_for_each_entry(hook, hook_list, list) { list_for_each_entry(ops, &hook->ops_list, list) { err = nf_register_net_hook(net, ops); if (err < 0) goto err_register; j++; } } return 0; err_register: list_for_each_entry(hook, hook_list, list) { list_for_each_entry(ops, &hook->ops_list, list) { if (j-- <= 0) break; nf_unregister_net_hook(net, ops); } } return err; } static void nft_netdev_hook_free_ops(struct nft_hook *hook) { struct nf_hook_ops *ops, *next; list_for_each_entry_safe(ops, next, &hook->ops_list, list) { list_del(&ops->list); kfree(ops); } } static void nft_netdev_hook_free(struct nft_hook *hook) { nft_netdev_hook_free_ops(hook); kfree(hook); } static void __nft_netdev_hook_free_rcu(struct rcu_head *rcu) { struct nft_hook *hook = container_of(rcu, struct nft_hook, rcu); nft_netdev_hook_free(hook); } static void nft_netdev_hook_free_rcu(struct nft_hook *hook) { call_rcu(&hook->rcu, __nft_netdev_hook_free_rcu); } static void nft_netdev_unregister_hooks(struct net *net, struct list_head *hook_list, bool release_netdev) { struct nft_hook *hook, *next; struct nf_hook_ops *ops; list_for_each_entry_safe(hook, next, hook_list, list) { list_for_each_entry(ops, &hook->ops_list, list) nf_unregister_net_hook(net, ops); if (release_netdev) { list_del(&hook->list); nft_netdev_hook_free_rcu(hook); } } } static int nf_tables_register_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain) { struct nft_base_chain *basechain; const struct nf_hook_ops *ops; if (table->flags & NFT_TABLE_F_DORMANT || !nft_is_base_chain(chain)) return 0; basechain = nft_base_chain(chain); ops = &basechain->ops; if (basechain->type->ops_register) return basechain->type->ops_register(net, ops); if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) return nft_netdev_register_hooks(net, &basechain->hook_list); return nf_register_net_hook(net, &basechain->ops); } static void __nf_tables_unregister_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain, bool release_netdev) { struct nft_base_chain *basechain; const struct nf_hook_ops *ops; if (table->flags & NFT_TABLE_F_DORMANT || !nft_is_base_chain(chain)) return; basechain = nft_base_chain(chain); ops = &basechain->ops; if (basechain->type->ops_unregister) return basechain->type->ops_unregister(net, ops); if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) nft_netdev_unregister_hooks(net, &basechain->hook_list, release_netdev); else nf_unregister_net_hook(net, &basechain->ops); } static void nf_tables_unregister_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain) { return __nf_tables_unregister_hook(net, table, chain, false); } static bool nft_trans_collapse_set_elem_allowed(const struct nft_trans_elem *a, const struct nft_trans_elem *b) { /* NB: the ->bound equality check is defensive, at this time we only merge * a new nft_trans_elem transaction request with the transaction tail * element, but a->bound != b->bound would imply a NEWRULE transaction * is queued in-between. * * The set check is mandatory, the NFT_MAX_SET_NELEMS check prevents * huge krealloc() requests. */ return a->set == b->set && a->bound == b->bound && a->nelems < NFT_MAX_SET_NELEMS; } static bool nft_trans_collapse_set_elem(struct nftables_pernet *nft_net, struct nft_trans_elem *tail, struct nft_trans_elem *trans, gfp_t gfp) { unsigned int nelems, old_nelems = tail->nelems; struct nft_trans_elem *new_trans; if (!nft_trans_collapse_set_elem_allowed(tail, trans)) return false; /* "cannot happen", at this time userspace element add * requests always allocate a new transaction element. * * This serves as a reminder to adjust the list_add_tail * logic below in case this ever changes. */ if (WARN_ON_ONCE(trans->nelems != 1)) return false; if (check_add_overflow(old_nelems, trans->nelems, &nelems)) return false; /* krealloc might free tail which invalidates list pointers */ list_del_init(&tail->nft_trans.list); new_trans = krealloc(tail, struct_size(tail, elems, nelems), gfp); if (!new_trans) { list_add_tail(&tail->nft_trans.list, &nft_net->commit_list); return false; } /* * new_trans->nft_trans.list contains garbage, but * list_add_tail() doesn't care. */ new_trans->nelems = nelems; new_trans->elems[old_nelems] = trans->elems[0]; list_add_tail(&new_trans->nft_trans.list, &nft_net->commit_list); return true; } static bool nft_trans_try_collapse(struct nftables_pernet *nft_net, struct nft_trans *trans, gfp_t gfp) { struct nft_trans *tail; if (list_empty(&nft_net->commit_list)) return false; tail = list_last_entry(&nft_net->commit_list, struct nft_trans, list); if (tail->msg_type != trans->msg_type) return false; switch (trans->msg_type) { case NFT_MSG_NEWSETELEM: case NFT_MSG_DELSETELEM: return nft_trans_collapse_set_elem(nft_net, nft_trans_container_elem(tail), nft_trans_container_elem(trans), gfp); } return false; } static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans_binding *binding; struct nft_trans_set *trans_set; list_add_tail(&trans->list, &nft_net->commit_list); binding = nft_trans_get_binding(trans); if (!binding) return; switch (trans->msg_type) { case NFT_MSG_NEWSET: trans_set = nft_trans_container_set(trans); if (!nft_trans_set_update(trans) && nft_set_is_anonymous(nft_trans_set(trans))) list_add_tail(&binding->binding_list, &nft_net->binding_list); list_add_tail(&trans_set->list_trans_newset, &nft_net->commit_set_list); break; case NFT_MSG_NEWCHAIN: if (!nft_trans_chain_update(trans) && nft_chain_binding(nft_trans_chain(trans))) list_add_tail(&binding->binding_list, &nft_net->binding_list); break; } } static void nft_trans_commit_list_add_elem(struct net *net, struct nft_trans *trans, gfp_t gfp) { struct nftables_pernet *nft_net = nft_pernet(net); WARN_ON_ONCE(trans->msg_type != NFT_MSG_NEWSETELEM && trans->msg_type != NFT_MSG_DELSETELEM); might_alloc(gfp); if (nft_trans_try_collapse(nft_net, trans, gfp)) { kfree(trans); return; } nft_trans_commit_list_add_tail(net, trans); } static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) { struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table)); if (trans == NULL) return -ENOMEM; if (msg_type == NFT_MSG_NEWTABLE) nft_activate_next(ctx->net, ctx->table); nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } static int nft_deltable(struct nft_ctx *ctx) { int err; err = nft_trans_table_add(ctx, NFT_MSG_DELTABLE); if (err < 0) return err; nft_deactivate_next(ctx->net, ctx->table); return err; } static struct nft_trans * nft_trans_alloc_chain(const struct nft_ctx *ctx, int msg_type) { struct nft_trans_chain *trans_chain; struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain)); if (!trans) return NULL; trans_chain = nft_trans_container_chain(trans); INIT_LIST_HEAD(&trans_chain->nft_trans_binding.binding_list); trans_chain->chain = ctx->chain; return trans; } static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) { struct nft_trans *trans; trans = nft_trans_alloc_chain(ctx, msg_type); if (trans == NULL) return ERR_PTR(-ENOMEM); if (msg_type == NFT_MSG_NEWCHAIN) { nft_activate_next(ctx->net, ctx->chain); if (ctx->nla[NFTA_CHAIN_ID]) { nft_trans_chain_id(trans) = ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID])); } } nft_trans_commit_list_add_tail(ctx->net, trans); return trans; } static int nft_delchain(struct nft_ctx *ctx) { struct nft_trans *trans; trans = nft_trans_chain_add(ctx, NFT_MSG_DELCHAIN); if (IS_ERR(trans)) return PTR_ERR(trans); nft_use_dec(&ctx->table->use); nft_deactivate_next(ctx->net, ctx->chain); return 0; } void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule) { struct nft_expr *expr; expr = nft_expr_first(rule); while (nft_expr_more(rule, expr)) { if (expr->ops->activate) expr->ops->activate(ctx, expr); expr = nft_expr_next(expr); } } void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_rule *rule, enum nft_trans_phase phase) { struct nft_expr *expr; expr = nft_expr_first(rule); while (nft_expr_more(rule, expr)) { if (expr->ops->deactivate) expr->ops->deactivate(ctx, expr, phase); expr = nft_expr_next(expr); } } static int nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule) { /* You cannot delete the same rule twice */ if (nft_is_active_next(ctx->net, rule)) { nft_deactivate_next(ctx->net, rule); nft_use_dec(&ctx->chain->use); return 0; } return -ENOENT; } static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type, struct nft_rule *rule) { struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule)); if (trans == NULL) return NULL; if (msg_type == NFT_MSG_NEWRULE && ctx->nla[NFTA_RULE_ID] != NULL) { nft_trans_rule_id(trans) = ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID])); } nft_trans_rule(trans) = rule; nft_trans_rule_chain(trans) = ctx->chain; nft_trans_commit_list_add_tail(ctx->net, trans); return trans; } static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule) { struct nft_flow_rule *flow; struct nft_trans *trans; int err; trans = nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule); if (trans == NULL) return -ENOMEM; if (ctx->chain->flags & NFT_CHAIN_HW_OFFLOAD) { flow = nft_flow_rule_create(ctx->net, rule); if (IS_ERR(flow)) { nft_trans_destroy(trans); return PTR_ERR(flow); } nft_trans_flow_rule(trans) = flow; } err = nf_tables_delrule_deactivate(ctx, rule); if (err < 0) { nft_trans_destroy(trans); return err; } nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_PREPARE); return 0; } static int nft_delrule_by_chain(struct nft_ctx *ctx) { struct nft_rule *rule; int err; list_for_each_entry(rule, &ctx->chain->rules, list) { if (!nft_is_active_next(ctx->net, rule)) continue; err = nft_delrule(ctx, rule); if (err < 0) return err; } return 0; } static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, struct nft_set *set, const struct nft_set_desc *desc) { struct nft_trans_set *trans_set; struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set)); if (trans == NULL) return -ENOMEM; trans_set = nft_trans_container_set(trans); INIT_LIST_HEAD(&trans_set->nft_trans_binding.binding_list); INIT_LIST_HEAD(&trans_set->list_trans_newset); if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) { nft_trans_set_id(trans) = ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); nft_activate_next(ctx->net, set); } nft_trans_set(trans) = set; if (desc) { nft_trans_set_update(trans) = true; nft_trans_set_gc_int(trans) = desc->gc_int; nft_trans_set_timeout(trans) = desc->timeout; nft_trans_set_size(trans) = desc->size; } nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, struct nft_set *set) { return __nft_trans_set_add(ctx, msg_type, set, NULL); } static int nft_mapelem_deactivate(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, struct nft_elem_priv *elem_priv) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); if (!nft_set_elem_active(ext, iter->genmask)) return 0; nft_set_elem_change_active(ctx->net, set, ext); nft_setelem_data_deactivate(ctx->net, set, elem_priv); return 0; } struct nft_set_elem_catchall { struct list_head list; struct rcu_head rcu; struct nft_elem_priv *elem; }; static void nft_map_catchall_deactivate(const struct nft_ctx *ctx, struct nft_set *set) { u8 genmask = nft_genmask_next(ctx->net); struct nft_set_elem_catchall *catchall; struct nft_set_ext *ext; list_for_each_entry(catchall, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); if (!nft_set_elem_active(ext, genmask)) continue; nft_set_elem_change_active(ctx->net, set, ext); nft_setelem_data_deactivate(ctx->net, set, catchall->elem); break; } } static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set) { struct nft_set_iter iter = { .genmask = nft_genmask_next(ctx->net), .type = NFT_ITER_UPDATE, .fn = nft_mapelem_deactivate, }; set->ops->walk(ctx, set, &iter); WARN_ON_ONCE(iter.err); nft_map_catchall_deactivate(ctx, set); } static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) { int err; err = nft_trans_set_add(ctx, NFT_MSG_DELSET, set); if (err < 0) return err; if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) nft_map_deactivate(ctx, set); nft_deactivate_next(ctx->net, set); nft_use_dec(&ctx->table->use); return err; } static int nft_trans_obj_add(struct nft_ctx *ctx, int msg_type, struct nft_object *obj) { struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_obj)); if (trans == NULL) return -ENOMEM; if (msg_type == NFT_MSG_NEWOBJ) nft_activate_next(ctx->net, obj); nft_trans_obj(trans) = obj; nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) { int err; err = nft_trans_obj_add(ctx, NFT_MSG_DELOBJ, obj); if (err < 0) return err; nft_deactivate_next(ctx->net, obj); nft_use_dec(&ctx->table->use); return err; } static struct nft_trans * nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, struct nft_flowtable *flowtable) { struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_flowtable)); if (trans == NULL) return ERR_PTR(-ENOMEM); if (msg_type == NFT_MSG_NEWFLOWTABLE) nft_activate_next(ctx->net, flowtable); INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans)); nft_trans_flowtable(trans) = flowtable; nft_trans_commit_list_add_tail(ctx->net, trans); return trans; } static int nft_delflowtable(struct nft_ctx *ctx, struct nft_flowtable *flowtable) { struct nft_trans *trans; trans = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); if (IS_ERR(trans)) return PTR_ERR(trans); nft_deactivate_next(ctx->net, flowtable); nft_use_dec(&ctx->table->use); return 0; } static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg) { int i; for (i = track->regs[dreg].num_reg; i > 0; i--) __nft_reg_track_cancel(track, dreg - i); } static void __nft_reg_track_update(struct nft_regs_track *track, const struct nft_expr *expr, u8 dreg, u8 num_reg) { track->regs[dreg].selector = expr; track->regs[dreg].bitwise = NULL; track->regs[dreg].num_reg = num_reg; } void nft_reg_track_update(struct nft_regs_track *track, const struct nft_expr *expr, u8 dreg, u8 len) { unsigned int regcount; int i; __nft_reg_track_clobber(track, dreg); regcount = DIV_ROUND_UP(len, NFT_REG32_SIZE); for (i = 0; i < regcount; i++, dreg++) __nft_reg_track_update(track, expr, dreg, i); } EXPORT_SYMBOL_GPL(nft_reg_track_update); void nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg, u8 len) { unsigned int regcount; int i; __nft_reg_track_clobber(track, dreg); regcount = DIV_ROUND_UP(len, NFT_REG32_SIZE); for (i = 0; i < regcount; i++, dreg++) __nft_reg_track_cancel(track, dreg); } EXPORT_SYMBOL_GPL(nft_reg_track_cancel); void __nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg) { track->regs[dreg].selector = NULL; track->regs[dreg].bitwise = NULL; track->regs[dreg].num_reg = 0; } EXPORT_SYMBOL_GPL(__nft_reg_track_cancel); /* * Tables */ static struct nft_table *nft_table_lookup(const struct net *net, const struct nlattr *nla, u8 family, u8 genmask, u32 nlpid) { struct nftables_pernet *nft_net; struct nft_table *table; if (nla == NULL) return ERR_PTR(-EINVAL); nft_net = nft_pernet(net); list_for_each_entry_rcu(table, &nft_net->tables, list, lockdep_is_held(&nft_net->commit_mutex)) { if (!nla_strcmp(nla, table->name) && table->family == family && nft_active_genmask(table, genmask)) { if (nft_table_has_owner(table) && nlpid && table->nlpid != nlpid) return ERR_PTR(-EPERM); return table; } } return ERR_PTR(-ENOENT); } static struct nft_table *nft_table_lookup_byhandle(const struct net *net, const struct nlattr *nla, int family, u8 genmask, u32 nlpid) { struct nftables_pernet *nft_net; struct nft_table *table; nft_net = nft_pernet(net); list_for_each_entry(table, &nft_net->tables, list) { if (be64_to_cpu(nla_get_be64(nla)) == table->handle && table->family == family && nft_active_genmask(table, genmask)) { if (nft_table_has_owner(table) && nlpid && table->nlpid != nlpid) return ERR_PTR(-EPERM); return table; } } return ERR_PTR(-ENOENT); } static inline u64 nf_tables_alloc_handle(struct nft_table *table) { return ++table->hgenerator; } static const struct nft_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX]; static const struct nft_chain_type * __nft_chain_type_get(u8 family, enum nft_chain_types type) { if (family >= NFPROTO_NUMPROTO || type >= NFT_CHAIN_T_MAX) return NULL; return chain_type[family][type]; } static const struct nft_chain_type * __nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family) { const struct nft_chain_type *type; int i; for (i = 0; i < NFT_CHAIN_T_MAX; i++) { type = __nft_chain_type_get(family, i); if (!type) continue; if (!nla_strcmp(nla, type->name)) return type; } return NULL; } struct nft_module_request { struct list_head list; char module[MODULE_NAME_LEN]; bool done; }; #ifdef CONFIG_MODULES __printf(2, 3) int nft_request_module(struct net *net, const char *fmt, ...) { char module_name[MODULE_NAME_LEN]; struct nftables_pernet *nft_net; struct nft_module_request *req; va_list args; int ret; va_start(args, fmt); ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); va_end(args); if (ret >= MODULE_NAME_LEN) return 0; nft_net = nft_pernet(net); list_for_each_entry(req, &nft_net->module_list, list) { if (!strcmp(req->module, module_name)) { if (req->done) return 0; /* A request to load this module already exists. */ return -EAGAIN; } } req = kmalloc(sizeof(*req), GFP_KERNEL); if (!req) return -ENOMEM; req->done = false; strscpy(req->module, module_name, MODULE_NAME_LEN); list_add_tail(&req->list, &nft_net->module_list); return -EAGAIN; } EXPORT_SYMBOL_GPL(nft_request_module); #endif static void lockdep_nfnl_nft_mutex_not_held(void) { #ifdef CONFIG_PROVE_LOCKING if (debug_locks) WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); #endif } static const struct nft_chain_type * nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla, u8 family, bool autoload) { const struct nft_chain_type *type; type = __nf_tables_chain_type_lookup(nla, family); if (type != NULL) return type; lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES if (autoload) { if (nft_request_module(net, "nft-chain-%u-%.*s", family, nla_len(nla), (const char *)nla_data(nla)) == -EAGAIN) return ERR_PTR(-EAGAIN); } #endif return ERR_PTR(-ENOENT); } static __be16 nft_base_seq(const struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); return htons(nft_net->base_seq & 0xffff); } static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { [NFTA_TABLE_NAME] = { .type = NLA_STRING, .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, [NFTA_TABLE_HANDLE] = { .type = NLA_U64 }, [NFTA_TABLE_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN } }; static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq, int event, u32 flags, int family, const struct nft_table *table) { struct nlmsghdr *nlh; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, NFNETLINK_V0, nft_base_seq(net)); if (!nlh) goto nla_put_failure; if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) || nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle), NFTA_TABLE_PAD)) goto nla_put_failure; if (event == NFT_MSG_DELTABLE) { nlmsg_end(skb, nlh); return 0; } if (nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags & NFT_TABLE_F_MASK))) goto nla_put_failure; if (nft_table_has_owner(table) && nla_put_be32(skb, NFTA_TABLE_OWNER, htonl(table->nlpid))) goto nla_put_failure; if (table->udata) { if (nla_put(skb, NFTA_TABLE_USERDATA, table->udlen, table->udata)) goto nla_put_failure; } nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_trim(skb, nlh); return -1; } struct nftnl_skb_parms { bool report; }; #define NFT_CB(skb) (*(struct nftnl_skb_parms*)&((skb)->cb)) static void nft_notify_enqueue(struct sk_buff *skb, bool report, struct list_head *notify_list) { NFT_CB(skb).report = report; list_add_tail(&skb->list, notify_list); } static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) { struct nftables_pernet *nft_net; struct sk_buff *skb; u16 flags = 0; int err; if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return; skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) goto err; if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq, event, flags, ctx->family, ctx->table); if (err < 0) { kfree_skb(skb); goto err; } nft_net = nft_pernet(ctx->net); nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); } static int nf_tables_dump_tables(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); struct nftables_pernet *nft_net; const struct nft_table *table; unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; rcu_read_lock(); nft_net = nft_pernet(net); cb->seq = READ_ONCE(nft_net->base_seq); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; if (idx < s_idx) goto cont; if (idx > s_idx) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); if (!nft_is_active(net, table)) continue; if (nf_tables_fill_table_info(skb, net, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFT_MSG_NEWTABLE, NLM_F_MULTI, table->family, table) < 0) goto done; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } done: rcu_read_unlock(); cb->args[0] = idx; return skb->len; } static int nft_netlink_dump_start_rcu(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, struct netlink_dump_control *c) { int err; if (!try_module_get(THIS_MODULE)) return -EINVAL; rcu_read_unlock(); err = netlink_dump_start(nlsk, skb, nlh, c); rcu_read_lock(); module_put(THIS_MODULE); return err; } /* called with rcu_read_lock held */ static int nf_tables_gettable(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); u8 family = info->nfmsg->nfgen_family; const struct nft_table *table; struct net *net = info->net; struct sk_buff *skb2; int err; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nf_tables_dump_tables, .module = THIS_MODULE, }; return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } table = nft_table_lookup(net, nla[NFTA_TABLE_NAME], family, genmask, 0); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_TABLE_NAME]); return PTR_ERR(table); } skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) return -ENOMEM; err = nf_tables_fill_table_info(skb2, net, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0, family, table); if (err < 0) goto err_fill_table_info; return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); err_fill_table_info: kfree_skb(skb2); return err; } static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt) { struct nft_chain *chain; u32 i = 0; list_for_each_entry(chain, &table->chains, list) { if (!nft_is_active_next(net, chain)) continue; if (!nft_is_base_chain(chain)) continue; if (cnt && i++ == cnt) break; nf_tables_unregister_hook(net, table, chain); } } static int nf_tables_table_enable(struct net *net, struct nft_table *table) { struct nft_chain *chain; int err, i = 0; list_for_each_entry(chain, &table->chains, list) { if (!nft_is_active_next(net, chain)) continue; if (!nft_is_base_chain(chain)) continue; err = nf_tables_register_hook(net, table, chain); if (err < 0) goto err_register_hooks; i++; } return 0; err_register_hooks: if (i) nft_table_disable(net, table, i); return err; } static void nf_tables_table_disable(struct net *net, struct nft_table *table) { table->flags &= ~NFT_TABLE_F_DORMANT; nft_table_disable(net, table, 0); table->flags |= NFT_TABLE_F_DORMANT; } #define __NFT_TABLE_F_INTERNAL (NFT_TABLE_F_MASK + 1) #define __NFT_TABLE_F_WAS_DORMANT (__NFT_TABLE_F_INTERNAL << 0) #define __NFT_TABLE_F_WAS_AWAKEN (__NFT_TABLE_F_INTERNAL << 1) #define __NFT_TABLE_F_WAS_ORPHAN (__NFT_TABLE_F_INTERNAL << 2) #define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \ __NFT_TABLE_F_WAS_AWAKEN | \ __NFT_TABLE_F_WAS_ORPHAN) static bool nft_table_pending_update(const struct nft_ctx *ctx) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); struct nft_trans *trans; if (ctx->table->flags & __NFT_TABLE_F_UPDATE) return true; list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->table == ctx->table && ((trans->msg_type == NFT_MSG_NEWCHAIN && nft_trans_chain_update(trans)) || (trans->msg_type == NFT_MSG_DELCHAIN && nft_is_base_chain(nft_trans_chain(trans))))) return true; } return false; } static int nf_tables_updtable(struct nft_ctx *ctx) { struct nft_trans *trans; u32 flags; int ret; if (!ctx->nla[NFTA_TABLE_FLAGS]) return 0; flags = ntohl(nla_get_be32(ctx->nla[NFTA_TABLE_FLAGS])); if (flags & ~NFT_TABLE_F_MASK) return -EOPNOTSUPP; if (flags == (ctx->table->flags & NFT_TABLE_F_MASK)) return 0; if ((nft_table_has_owner(ctx->table) && !(flags & NFT_TABLE_F_OWNER)) || (flags & NFT_TABLE_F_OWNER && !nft_table_is_orphan(ctx->table))) return -EOPNOTSUPP; if ((flags ^ ctx->table->flags) & NFT_TABLE_F_PERSIST) return -EOPNOTSUPP; /* No dormant off/on/off/on games in single transaction */ if (nft_table_pending_update(ctx)) return -EINVAL; trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, sizeof(struct nft_trans_table)); if (trans == NULL) return -ENOMEM; if ((flags & NFT_TABLE_F_DORMANT) && !(ctx->table->flags & NFT_TABLE_F_DORMANT)) { ctx->table->flags |= NFT_TABLE_F_DORMANT; if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) ctx->table->flags |= __NFT_TABLE_F_WAS_AWAKEN; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { ctx->table->flags &= ~NFT_TABLE_F_DORMANT; if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) { ret = nf_tables_table_enable(ctx->net, ctx->table); if (ret < 0) goto err_register_hooks; ctx->table->flags |= __NFT_TABLE_F_WAS_DORMANT; } } if ((flags & NFT_TABLE_F_OWNER) && !nft_table_has_owner(ctx->table)) { ctx->table->nlpid = ctx->portid; ctx->table->flags |= NFT_TABLE_F_OWNER | __NFT_TABLE_F_WAS_ORPHAN; } nft_trans_table_update(trans) = true; nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err_register_hooks: ctx->table->flags |= NFT_TABLE_F_DORMANT; nft_trans_destroy(trans); return ret; } static u32 nft_chain_hash(const void *data, u32 len, u32 seed) { const char *name = data; return jhash(name, strlen(name), seed); } static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed) { const struct nft_chain *chain = data; return nft_chain_hash(chain->name, 0, seed); } static int nft_chain_hash_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { const struct nft_chain *chain = ptr; const char *name = arg->key; return strcmp(chain->name, name); } static u32 nft_objname_hash(const void *data, u32 len, u32 seed) { const struct nft_object_hash_key *k = data; seed ^= hash_ptr(k->table, 32); return jhash(k->name, strlen(k->name), seed); } static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed) { const struct nft_object *obj = data; return nft_objname_hash(&obj->key, 0, seed); } static int nft_objname_hash_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { const struct nft_object_hash_key *k = arg->key; const struct nft_object *obj = ptr; if (obj->key.table != k->table) return -1; return strcmp(obj->key.name, k->name); } static bool nft_supported_family(u8 family) { return false #ifdef CONFIG_NF_TABLES_INET || family == NFPROTO_INET #endif #ifdef CONFIG_NF_TABLES_IPV4 || family == NFPROTO_IPV4 #endif #ifdef CONFIG_NF_TABLES_ARP || family == NFPROTO_ARP #endif #ifdef CONFIG_NF_TABLES_NETDEV || family == NFPROTO_NETDEV #endif #if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE) || family == NFPROTO_BRIDGE #endif #ifdef CONFIG_NF_TABLES_IPV6 || family == NFPROTO_IPV6 #endif ; } static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct nftables_pernet *nft_net = nft_pernet(info->net); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); u8 family = info->nfmsg->nfgen_family; struct net *net = info->net; const struct nlattr *attr; struct nft_table *table; struct nft_ctx ctx; u32 flags = 0; int err; if (!nft_supported_family(family)) return -EOPNOTSUPP; lockdep_assert_held(&nft_net->commit_mutex); attr = nla[NFTA_TABLE_NAME]; table = nft_table_lookup(net, attr, family, genmask, NETLINK_CB(skb).portid); if (IS_ERR(table)) { if (PTR_ERR(table) != -ENOENT) return PTR_ERR(table); } else { if (info->nlh->nlmsg_flags & NLM_F_EXCL) { NL_SET_BAD_ATTR(extack, attr); return -EEXIST; } if (info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); return nf_tables_updtable(&ctx); } if (nla[NFTA_TABLE_FLAGS]) { flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS])); if (flags & ~NFT_TABLE_F_MASK) return -EOPNOTSUPP; } err = -ENOMEM; table = kzalloc(sizeof(*table), GFP_KERNEL_ACCOUNT); if (table == NULL) goto err_kzalloc; table->validate_state = nft_net->validate_state; table->name = nla_strdup(attr, GFP_KERNEL_ACCOUNT); if (table->name == NULL) goto err_strdup; if (nla[NFTA_TABLE_USERDATA]) { table->udata = nla_memdup(nla[NFTA_TABLE_USERDATA], GFP_KERNEL_ACCOUNT); if (table->udata == NULL) goto err_table_udata; table->udlen = nla_len(nla[NFTA_TABLE_USERDATA]); } err = rhltable_init(&table->chains_ht, &nft_chain_ht_params); if (err) goto err_chain_ht; INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); INIT_LIST_HEAD(&table->objects); INIT_LIST_HEAD(&table->flowtables); table->family = family; table->flags = flags; table->handle = ++nft_net->table_handle; if (table->flags & NFT_TABLE_F_OWNER) table->nlpid = NETLINK_CB(skb).portid; nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); if (err < 0) goto err_trans; list_add_tail_rcu(&table->list, &nft_net->tables); return 0; err_trans: rhltable_destroy(&table->chains_ht); err_chain_ht: kfree(table->udata); err_table_udata: kfree(table->name); err_strdup: kfree(table); err_kzalloc: return err; } static int nft_flush_table(struct nft_ctx *ctx) { struct nft_flowtable *flowtable, *nft; struct nft_chain *chain, *nc; struct nft_object *obj, *ne; struct nft_set *set, *ns; int err; list_for_each_entry(chain, &ctx->table->chains, list) { if (!nft_is_active_next(ctx->net, chain)) continue; if (nft_chain_binding(chain)) continue; ctx->chain = chain; err = nft_delrule_by_chain(ctx); if (err < 0) goto out; } list_for_each_entry_safe(set, ns, &ctx->table->sets, list) { if (!nft_is_active_next(ctx->net, set)) continue; if (nft_set_is_anonymous(set)) continue; err = nft_delset(ctx, set); if (err < 0) goto out; } list_for_each_entry_safe(flowtable, nft, &ctx->table->flowtables, list) { if (!nft_is_active_next(ctx->net, flowtable)) continue; err = nft_delflowtable(ctx, flowtable); if (err < 0) goto out; } list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) { if (!nft_is_active_next(ctx->net, obj)) continue; err = nft_delobj(ctx, obj); if (err < 0) goto out; } list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) { if (!nft_is_active_next(ctx->net, chain)) continue; if (nft_chain_binding(chain)) continue; ctx->chain = chain; err = nft_delchain(ctx); if (err < 0) goto out; } err = nft_deltable(ctx); out: return err; } static int nft_flush(struct nft_ctx *ctx, int family) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); const struct nlattr * const *nla = ctx->nla; struct nft_table *table, *nt; int err = 0; list_for_each_entry_safe(table, nt, &nft_net->tables, list) { if (family != AF_UNSPEC && table->family != family) continue; ctx->family = table->family; if (!nft_is_active_next(ctx->net, table)) continue; if (nft_table_has_owner(table) && table->nlpid != ctx->portid) continue; if (nla[NFTA_TABLE_NAME] && nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0) continue; ctx->table = table; err = nft_flush_table(ctx); if (err < 0) goto out; } out: return err; } static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); u8 family = info->nfmsg->nfgen_family; struct net *net = info->net; const struct nlattr *attr; struct nft_table *table; struct nft_ctx ctx; nft_ctx_init(&ctx, net, skb, info->nlh, 0, NULL, NULL, nla); if (family == AF_UNSPEC || (!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE])) return nft_flush(&ctx, family); if (nla[NFTA_TABLE_HANDLE]) { attr = nla[NFTA_TABLE_HANDLE]; table = nft_table_lookup_byhandle(net, attr, family, genmask, NETLINK_CB(skb).portid); } else { attr = nla[NFTA_TABLE_NAME]; table = nft_table_lookup(net, attr, family, genmask, NETLINK_CB(skb).portid); } if (IS_ERR(table)) { if (PTR_ERR(table) == -ENOENT && NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYTABLE) return 0; NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(table); } if (info->nlh->nlmsg_flags & NLM_F_NONREC && table->use > 0) return -EBUSY; ctx.family = family; ctx.table = table; return nft_flush_table(&ctx); } static void nf_tables_table_destroy(struct nft_table *table) { if (WARN_ON(table->use > 0)) return; rhltable_destroy(&table->chains_ht); kfree(table->name); kfree(table->udata); kfree(table); } void nft_register_chain_type(const struct nft_chain_type *ctype) { nfnl_lock(NFNL_SUBSYS_NFTABLES); if (WARN_ON(__nft_chain_type_get(ctype->family, ctype->type))) { nfnl_unlock(NFNL_SUBSYS_NFTABLES); return; } chain_type[ctype->family][ctype->type] = ctype; nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_register_chain_type); void nft_unregister_chain_type(const struct nft_chain_type *ctype) { nfnl_lock(NFNL_SUBSYS_NFTABLES); chain_type[ctype->family][ctype->type] = NULL; nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_unregister_chain_type); /* * Chains */ static struct nft_chain * nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask) { struct nft_chain *chain; list_for_each_entry(chain, &table->chains, list) { if (chain->handle == handle && nft_active_genmask(chain, genmask)) return chain; } return ERR_PTR(-ENOENT); } static bool lockdep_commit_lock_is_held(const struct net *net) { #ifdef CONFIG_PROVE_LOCKING struct nftables_pernet *nft_net = nft_pernet(net); return lockdep_is_held(&nft_net->commit_mutex); #else return true; #endif } static struct nft_chain *nft_chain_lookup(struct net *net, struct nft_table *table, const struct nlattr *nla, u8 genmask) { char search[NFT_CHAIN_MAXNAMELEN + 1]; struct rhlist_head *tmp, *list; struct nft_chain *chain; if (nla == NULL) return ERR_PTR(-EINVAL); nla_strscpy(search, nla, sizeof(search)); WARN_ON(!rcu_read_lock_held() && !lockdep_commit_lock_is_held(net)); chain = ERR_PTR(-ENOENT); rcu_read_lock(); list = rhltable_lookup(&table->chains_ht, search, nft_chain_ht_params); if (!list) goto out_unlock; rhl_for_each_entry_rcu(chain, tmp, list, rhlhead) { if (nft_active_genmask(chain, genmask)) goto out_unlock; } chain = ERR_PTR(-ENOENT); out_unlock: rcu_read_unlock(); return chain; } static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { [NFTA_CHAIN_TABLE] = { .type = NLA_STRING, .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_CHAIN_HANDLE] = { .type = NLA_U64 }, [NFTA_CHAIN_NAME] = { .type = NLA_STRING, .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED }, [NFTA_CHAIN_POLICY] = { .type = NLA_U32 }, [NFTA_CHAIN_TYPE] = { .type = NLA_STRING, .len = NFT_MODULE_AUTOLOAD_LIMIT }, [NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED }, [NFTA_CHAIN_FLAGS] = { .type = NLA_U32 }, [NFTA_CHAIN_ID] = { .type = NLA_U32 }, [NFTA_CHAIN_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN }, }; static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { [NFTA_HOOK_HOOKNUM] = { .type = NLA_U32 }, [NFTA_HOOK_PRIORITY] = { .type = NLA_U32 }, [NFTA_HOOK_DEV] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, }; static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) { struct nft_stats *cpu_stats, total; struct nlattr *nest; unsigned int seq; u64 pkts, bytes; int cpu; if (!stats) return 0; memset(&total, 0, sizeof(total)); for_each_possible_cpu(cpu) { cpu_stats = per_cpu_ptr(stats, cpu); do { seq = u64_stats_fetch_begin(&cpu_stats->syncp); pkts = cpu_stats->pkts; bytes = cpu_stats->bytes; } while (u64_stats_fetch_retry(&cpu_stats->syncp, seq)); total.pkts += pkts; total.bytes += bytes; } nest = nla_nest_start_noflag(skb, NFTA_CHAIN_COUNTERS); if (nest == NULL) goto nla_put_failure; if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.pkts), NFTA_COUNTER_PAD) || nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes), NFTA_COUNTER_PAD)) goto nla_put_failure; nla_nest_end(skb, nest); return 0; nla_put_failure: return -ENOSPC; } static int nft_dump_basechain_hook(struct sk_buff *skb, const struct net *net, int family, const struct nft_base_chain *basechain, const struct list_head *hook_list) { const struct nf_hook_ops *ops = &basechain->ops; struct nft_hook *hook, *first = NULL; struct nlattr *nest, *nest_devs; int n = 0; nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK); if (nest == NULL) goto nla_put_failure; if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority))) goto nla_put_failure; if (nft_base_chain_netdev(family, ops->hooknum)) { nest_devs = nla_nest_start_noflag(skb, NFTA_HOOK_DEVS); if (!nest_devs) goto nla_put_failure; if (!hook_list) hook_list = &basechain->hook_list; list_for_each_entry_rcu(hook, hook_list, list, lockdep_commit_lock_is_held(net)) { if (!first) first = hook; if (nla_put(skb, NFTA_DEVICE_NAME, hook->ifnamelen, hook->ifname)) goto nla_put_failure; n++; } nla_nest_end(skb, nest_devs); if (n == 1 && nla_put(skb, NFTA_HOOK_DEV, first->ifnamelen, first->ifname)) goto nla_put_failure; } nla_nest_end(skb, nest); return 0; nla_put_failure: return -1; } static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq, int event, u32 flags, int family, const struct nft_table *table, const struct nft_chain *chain, const struct list_head *hook_list) { struct nlmsghdr *nlh; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, NFNETLINK_V0, nft_base_seq(net)); if (!nlh) goto nla_put_failure; if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name) || nla_put_string(skb, NFTA_CHAIN_NAME, chain->name) || nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle), NFTA_CHAIN_PAD)) goto nla_put_failure; if (event == NFT_MSG_DELCHAIN && !hook_list) { nlmsg_end(skb, nlh); return 0; } if (nft_is_base_chain(chain)) { const struct nft_base_chain *basechain = nft_base_chain(chain); struct nft_stats __percpu *stats; if (nft_dump_basechain_hook(skb, net, family, basechain, hook_list)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CHAIN_POLICY, htonl(basechain->policy))) goto nla_put_failure; if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name)) goto nla_put_failure; stats = rcu_dereference_check(basechain->stats, lockdep_commit_lock_is_held(net)); if (nft_dump_stats(skb, stats)) goto nla_put_failure; } if (chain->flags && nla_put_be32(skb, NFTA_CHAIN_FLAGS, htonl(chain->flags))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use))) goto nla_put_failure; if (chain->udata && nla_put(skb, NFTA_CHAIN_USERDATA, chain->udlen, chain->udata)) goto nla_put_failure; nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_trim(skb, nlh); return -1; } static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event, const struct list_head *hook_list) { struct nftables_pernet *nft_net; struct sk_buff *skb; u16 flags = 0; int err; if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return; skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) goto err; if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq, event, flags, ctx->family, ctx->table, ctx->chain, hook_list); if (err < 0) { kfree_skb(skb); goto err; } nft_net = nft_pernet(ctx->net); nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); } static int nf_tables_dump_chains(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nftables_pernet *nft_net; const struct nft_table *table; const struct nft_chain *chain; rcu_read_lock(); nft_net = nft_pernet(net); cb->seq = READ_ONCE(nft_net->base_seq); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; list_for_each_entry_rcu(chain, &table->chains, list) { if (idx < s_idx) goto cont; if (idx > s_idx) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); if (!nft_is_active(net, chain)) continue; if (nf_tables_fill_chain_info(skb, net, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, NLM_F_MULTI, table->family, table, chain, NULL) < 0) goto done; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } } done: rcu_read_unlock(); cb->args[0] = idx; return skb->len; } /* called with rcu_read_lock held */ static int nf_tables_getchain(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); u8 family = info->nfmsg->nfgen_family; const struct nft_chain *chain; struct net *net = info->net; struct nft_table *table; struct sk_buff *skb2; int err; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nf_tables_dump_chains, .module = THIS_MODULE, }; return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask, 0); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]); return PTR_ERR(table); } chain = nft_chain_lookup(net, table, nla[NFTA_CHAIN_NAME], genmask); if (IS_ERR(chain)) { NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]); return PTR_ERR(chain); } skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) return -ENOMEM; err = nf_tables_fill_chain_info(skb2, net, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0, family, table, chain, NULL); if (err < 0) goto err_fill_chain_info; return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); err_fill_chain_info: kfree_skb(skb2); return err; } static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 }, [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, }; static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) { struct nlattr *tb[NFTA_COUNTER_MAX+1]; struct nft_stats __percpu *newstats; struct nft_stats *stats; int err; err = nla_parse_nested_deprecated(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy, NULL); if (err < 0) return ERR_PTR_PCPU(err); if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS]) return ERR_PTR_PCPU(-EINVAL); newstats = netdev_alloc_pcpu_stats(struct nft_stats); if (newstats == NULL) return ERR_PTR_PCPU(-ENOMEM); /* Restore old counters on this cpu, no problem. Per-cpu statistics * are not exposed to userspace. */ preempt_disable(); stats = this_cpu_ptr(newstats); stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); preempt_enable(); return newstats; } static void nft_chain_stats_replace(struct nft_trans_chain *trans) { const struct nft_trans *t = &trans->nft_trans_binding.nft_trans; struct nft_base_chain *chain = nft_base_chain(trans->chain); if (!trans->stats) return; trans->stats = rcu_replace_pointer(chain->stats, trans->stats, lockdep_commit_lock_is_held(t->net)); if (!trans->stats) static_branch_inc(&nft_counters_enabled); } static void nf_tables_chain_free_chain_rules(struct nft_chain *chain) { struct nft_rule_blob *g0 = rcu_dereference_raw(chain->blob_gen_0); struct nft_rule_blob *g1 = rcu_dereference_raw(chain->blob_gen_1); if (g0 != g1) kvfree(g1); kvfree(g0); /* should be NULL either via abort or via successful commit */ WARN_ON_ONCE(chain->blob_next); kvfree(chain->blob_next); } void nf_tables_chain_destroy(struct nft_chain *chain) { const struct nft_table *table = chain->table; struct nft_hook *hook, *next; if (WARN_ON(chain->use > 0)) return; /* no concurrent access possible anymore */ nf_tables_chain_free_chain_rules(chain); if (nft_is_base_chain(chain)) { struct nft_base_chain *basechain = nft_base_chain(chain); if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) { list_for_each_entry_safe(hook, next, &basechain->hook_list, list) { list_del_rcu(&hook->list); nft_netdev_hook_free_rcu(hook); } } module_put(basechain->type->owner); if (rcu_access_pointer(basechain->stats)) { static_branch_dec(&nft_counters_enabled); free_percpu(rcu_dereference_raw(basechain->stats)); } kfree(chain->name); kfree(chain->udata); kfree(basechain); } else { kfree(chain->name); kfree(chain->udata); kfree(chain); } } static struct nft_hook *nft_netdev_hook_alloc(struct net *net, const struct nlattr *attr) { struct nf_hook_ops *ops; struct net_device *dev; struct nft_hook *hook; int err; hook = kzalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); if (!hook) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&hook->ops_list); err = nla_strscpy(hook->ifname, attr, IFNAMSIZ); if (err < 0) goto err_hook_free; hook->ifnamelen = nla_len(attr); /* nf_tables_netdev_event() is called under rtnl_mutex, this is * indirectly serializing all the other holders of the commit_mutex with * the rtnl_mutex. */ for_each_netdev(net, dev) { if (strncmp(dev->name, hook->ifname, hook->ifnamelen)) continue; ops = kzalloc(sizeof(struct nf_hook_ops), GFP_KERNEL_ACCOUNT); if (!ops) { err = -ENOMEM; goto err_hook_free; } ops->dev = dev; list_add_tail(&ops->list, &hook->ops_list); } return hook; err_hook_free: nft_netdev_hook_free(hook); return ERR_PTR(err); } static struct nft_hook *nft_hook_list_find(struct list_head *hook_list, const struct nft_hook *this) { struct nft_hook *hook; list_for_each_entry(hook, hook_list, list) { if (!strncmp(hook->ifname, this->ifname, min(hook->ifnamelen, this->ifnamelen))) return hook; } return NULL; } static int nf_tables_parse_netdev_hooks(struct net *net, const struct nlattr *attr, struct list_head *hook_list, struct netlink_ext_ack *extack) { struct nft_hook *hook, *next; const struct nlattr *tmp; int rem, n = 0, err; nla_for_each_nested(tmp, attr, rem) { if (nla_type(tmp) != NFTA_DEVICE_NAME) { err = -EINVAL; goto err_hook; } hook = nft_netdev_hook_alloc(net, tmp); if (IS_ERR(hook)) { NL_SET_BAD_ATTR(extack, tmp); err = PTR_ERR(hook); goto err_hook; } if (nft_hook_list_find(hook_list, hook)) { NL_SET_BAD_ATTR(extack, tmp); nft_netdev_hook_free(hook); err = -EEXIST; goto err_hook; } list_add_tail(&hook->list, hook_list); n++; if (n == NFT_NETDEVICE_MAX) { err = -EFBIG; goto err_hook; } } return 0; err_hook: list_for_each_entry_safe(hook, next, hook_list, list) { list_del(&hook->list); nft_netdev_hook_free(hook); } return err; } struct nft_chain_hook { u32 num; s32 priority; const struct nft_chain_type *type; struct list_head list; }; static int nft_chain_parse_netdev(struct net *net, struct nlattr *tb[], struct list_head *hook_list, struct netlink_ext_ack *extack, u32 flags) { struct nft_hook *hook; int err; if (tb[NFTA_HOOK_DEV]) { hook = nft_netdev_hook_alloc(net, tb[NFTA_HOOK_DEV]); if (IS_ERR(hook)) { NL_SET_BAD_ATTR(extack, tb[NFTA_HOOK_DEV]); return PTR_ERR(hook); } list_add_tail(&hook->list, hook_list); } else if (tb[NFTA_HOOK_DEVS]) { err = nf_tables_parse_netdev_hooks(net, tb[NFTA_HOOK_DEVS], hook_list, extack); if (err < 0) return err; } if (flags & NFT_CHAIN_HW_OFFLOAD && list_empty(hook_list)) return -EINVAL; return 0; } static int nft_chain_parse_hook(struct net *net, struct nft_base_chain *basechain, const struct nlattr * const nla[], struct nft_chain_hook *hook, u8 family, u32 flags, struct netlink_ext_ack *extack) { struct nftables_pernet *nft_net = nft_pernet(net); struct nlattr *ha[NFTA_HOOK_MAX + 1]; const struct nft_chain_type *type; int err; lockdep_assert_held(&nft_net->commit_mutex); lockdep_nfnl_nft_mutex_not_held(); err = nla_parse_nested_deprecated(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], nft_hook_policy, NULL); if (err < 0) return err; if (!basechain) { if (!ha[NFTA_HOOK_HOOKNUM] || !ha[NFTA_HOOK_PRIORITY]) { NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]); return -ENOENT; } hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); type = __nft_chain_type_get(family, NFT_CHAIN_T_DEFAULT); if (!type) return -EOPNOTSUPP; if (nla[NFTA_CHAIN_TYPE]) { type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE], family, true); if (IS_ERR(type)) { NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]); return PTR_ERR(type); } } if (hook->num >= NFT_MAX_HOOKS || !(type->hook_mask & (1 << hook->num))) return -EOPNOTSUPP; if (type->type == NFT_CHAIN_T_NAT && hook->priority <= NF_IP_PRI_CONNTRACK) return -EOPNOTSUPP; } else { if (ha[NFTA_HOOK_HOOKNUM]) { hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); if (hook->num != basechain->ops.hooknum) return -EOPNOTSUPP; } if (ha[NFTA_HOOK_PRIORITY]) { hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); if (hook->priority != basechain->ops.priority) return -EOPNOTSUPP; } if (nla[NFTA_CHAIN_TYPE]) { type = __nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE], family); if (!type) { NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]); return -ENOENT; } } else { type = basechain->type; } } if (!try_module_get(type->owner)) { if (nla[NFTA_CHAIN_TYPE]) NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]); return -ENOENT; } hook->type = type; INIT_LIST_HEAD(&hook->list); if (nft_base_chain_netdev(family, hook->num)) { err = nft_chain_parse_netdev(net, ha, &hook->list, extack, flags); if (err < 0) { module_put(type->owner); return err; } } else if (ha[NFTA_HOOK_DEV] || ha[NFTA_HOOK_DEVS]) { module_put(type->owner); return -EOPNOTSUPP; } return 0; } static void nft_chain_release_hook(struct nft_chain_hook *hook) { struct nft_hook *h, *next; list_for_each_entry_safe(h, next, &hook->list, list) { list_del(&h->list); nft_netdev_hook_free(h); } module_put(hook->type->owner); } static void nft_last_rule(const struct nft_chain *chain, const void *ptr) { struct nft_rule_dp_last *lrule; BUILD_BUG_ON(offsetof(struct nft_rule_dp_last, end) != 0); lrule = (struct nft_rule_dp_last *)ptr; lrule->end.is_last = 1; lrule->chain = chain; /* blob size does not include the trailer rule */ } static struct nft_rule_blob *nf_tables_chain_alloc_rules(const struct nft_chain *chain, unsigned int size) { struct nft_rule_blob *blob; if (size > INT_MAX) return NULL; size += sizeof(struct nft_rule_blob) + sizeof(struct nft_rule_dp_last); blob = kvmalloc(size, GFP_KERNEL_ACCOUNT); if (!blob) return NULL; blob->size = 0; nft_last_rule(chain, blob->data); return blob; } static void nft_basechain_hook_init(struct nf_hook_ops *ops, u8 family, const struct nft_chain_hook *hook, struct nft_chain *chain) { ops->pf = family; ops->hooknum = hook->num; ops->priority = hook->priority; ops->priv = chain; ops->hook = hook->type->hooks[ops->hooknum]; ops->hook_ops_type = NF_HOOK_OP_NF_TABLES; } static int nft_basechain_init(struct nft_base_chain *basechain, u8 family, struct nft_chain_hook *hook, u32 flags) { struct nft_chain *chain; struct nf_hook_ops *ops; struct nft_hook *h; basechain->type = hook->type; INIT_LIST_HEAD(&basechain->hook_list); chain = &basechain->chain; if (nft_base_chain_netdev(family, hook->num)) { list_splice_init(&hook->list, &basechain->hook_list); list_for_each_entry(h, &basechain->hook_list, list) { list_for_each_entry(ops, &h->ops_list, list) nft_basechain_hook_init(ops, family, hook, chain); } } nft_basechain_hook_init(&basechain->ops, family, hook, chain); chain->flags |= NFT_CHAIN_BASE | flags; basechain->policy = NF_ACCEPT; if (chain->flags & NFT_CHAIN_HW_OFFLOAD && !nft_chain_offload_support(basechain)) { list_splice_init(&basechain->hook_list, &hook->list); return -EOPNOTSUPP; } flow_block_init(&basechain->flow_block); return 0; } int nft_chain_add(struct nft_table *table, struct nft_chain *chain) { int err; err = rhltable_insert_key(&table->chains_ht, chain->name, &chain->rhlhead, nft_chain_ht_params); if (err) return err; list_add_tail_rcu(&chain->list, &table->chains); return 0; } static u64 chain_id; static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 policy, u32 flags, struct netlink_ext_ack *extack) { const struct nlattr * const *nla = ctx->nla; struct nft_table *table = ctx->table; struct nft_base_chain *basechain; struct net *net = ctx->net; char name[NFT_NAME_MAXLEN]; struct nft_rule_blob *blob; struct nft_trans *trans; struct nft_chain *chain; int err; if (nla[NFTA_CHAIN_HOOK]) { struct nft_stats __percpu *stats = NULL; struct nft_chain_hook hook = {}; if (table->flags & __NFT_TABLE_F_UPDATE) return -EINVAL; if (flags & NFT_CHAIN_BINDING) return -EOPNOTSUPP; err = nft_chain_parse_hook(net, NULL, nla, &hook, family, flags, extack); if (err < 0) return err; basechain = kzalloc(sizeof(*basechain), GFP_KERNEL_ACCOUNT); if (basechain == NULL) { nft_chain_release_hook(&hook); return -ENOMEM; } chain = &basechain->chain; if (nla[NFTA_CHAIN_COUNTERS]) { stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); if (IS_ERR_PCPU(stats)) { nft_chain_release_hook(&hook); kfree(basechain); return PTR_ERR_PCPU(stats); } rcu_assign_pointer(basechain->stats, stats); } err = nft_basechain_init(basechain, family, &hook, flags); if (err < 0) { nft_chain_release_hook(&hook); kfree(basechain); free_percpu(stats); return err; } if (stats) static_branch_inc(&nft_counters_enabled); } else { if (flags & NFT_CHAIN_BASE) return -EINVAL; if (flags & NFT_CHAIN_HW_OFFLOAD) return -EOPNOTSUPP; chain = kzalloc(sizeof(*chain), GFP_KERNEL_ACCOUNT); if (chain == NULL) return -ENOMEM; chain->flags = flags; } ctx->chain = chain; INIT_LIST_HEAD(&chain->rules); chain->handle = nf_tables_alloc_handle(table); chain->table = table; if (nla[NFTA_CHAIN_NAME]) { chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL_ACCOUNT); } else { if (!(flags & NFT_CHAIN_BINDING)) { err = -EINVAL; goto err_destroy_chain; } snprintf(name, sizeof(name), "__chain%llu", ++chain_id); chain->name = kstrdup(name, GFP_KERNEL_ACCOUNT); } if (!chain->name) { err = -ENOMEM; goto err_destroy_chain; } if (nla[NFTA_CHAIN_USERDATA]) { chain->udata = nla_memdup(nla[NFTA_CHAIN_USERDATA], GFP_KERNEL_ACCOUNT); if (chain->udata == NULL) { err = -ENOMEM; goto err_destroy_chain; } chain->udlen = nla_len(nla[NFTA_CHAIN_USERDATA]); } blob = nf_tables_chain_alloc_rules(chain, 0); if (!blob) { err = -ENOMEM; goto err_destroy_chain; } RCU_INIT_POINTER(chain->blob_gen_0, blob); RCU_INIT_POINTER(chain->blob_gen_1, blob); if (!nft_use_inc(&table->use)) { err = -EMFILE; goto err_destroy_chain; } trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (IS_ERR(trans)) { err = PTR_ERR(trans); goto err_trans; } nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET; if (nft_is_base_chain(chain)) nft_trans_chain_policy(trans) = policy; err = nft_chain_add(table, chain); if (err < 0) goto err_chain_add; /* This must be LAST to ensure no packets are walking over this chain. */ err = nf_tables_register_hook(net, table, chain); if (err < 0) goto err_register_hook; return 0; err_register_hook: nft_chain_del(chain); err_chain_add: nft_trans_destroy(trans); err_trans: nft_use_dec_restore(&table->use); err_destroy_chain: nf_tables_chain_destroy(chain); return err; } static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, u32 flags, const struct nlattr *attr, struct netlink_ext_ack *extack) { const struct nlattr * const *nla = ctx->nla; struct nft_base_chain *basechain = NULL; struct nft_table *table = ctx->table; struct nft_chain *chain = ctx->chain; struct nft_chain_hook hook = {}; struct nft_stats __percpu *stats = NULL; struct nft_hook *h, *next; struct nf_hook_ops *ops; struct nft_trans *trans; bool unregister = false; int err; if (chain->flags ^ flags) return -EOPNOTSUPP; INIT_LIST_HEAD(&hook.list); if (nla[NFTA_CHAIN_HOOK]) { if (!nft_is_base_chain(chain)) { NL_SET_BAD_ATTR(extack, attr); return -EEXIST; } basechain = nft_base_chain(chain); err = nft_chain_parse_hook(ctx->net, basechain, nla, &hook, ctx->family, flags, extack); if (err < 0) return err; if (basechain->type != hook.type) { nft_chain_release_hook(&hook); NL_SET_BAD_ATTR(extack, attr); return -EEXIST; } if (nft_base_chain_netdev(ctx->family, basechain->ops.hooknum)) { list_for_each_entry_safe(h, next, &hook.list, list) { list_for_each_entry(ops, &h->ops_list, list) { ops->pf = basechain->ops.pf; ops->hooknum = basechain->ops.hooknum; ops->priority = basechain->ops.priority; ops->priv = basechain->ops.priv; ops->hook = basechain->ops.hook; } if (nft_hook_list_find(&basechain->hook_list, h)) { list_del(&h->list); nft_netdev_hook_free(h); } } } else { ops = &basechain->ops; if (ops->hooknum != hook.num || ops->priority != hook.priority) { nft_chain_release_hook(&hook); NL_SET_BAD_ATTR(extack, attr); return -EEXIST; } } } if (nla[NFTA_CHAIN_HANDLE] && nla[NFTA_CHAIN_NAME]) { struct nft_chain *chain2; chain2 = nft_chain_lookup(ctx->net, table, nla[NFTA_CHAIN_NAME], genmask); if (!IS_ERR(chain2)) { NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]); err = -EEXIST; goto err_hooks; } } if (table->flags & __NFT_TABLE_F_UPDATE && !list_empty(&hook.list)) { NL_SET_BAD_ATTR(extack, attr); err = -EOPNOTSUPP; goto err_hooks; } if (!(table->flags & NFT_TABLE_F_DORMANT) && nft_is_base_chain(chain) && !list_empty(&hook.list)) { basechain = nft_base_chain(chain); ops = &basechain->ops; if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) { err = nft_netdev_register_hooks(ctx->net, &hook.list); if (err < 0) goto err_hooks; unregister = true; } } if (nla[NFTA_CHAIN_COUNTERS]) { if (!nft_is_base_chain(chain)) { err = -EOPNOTSUPP; goto err_hooks; } stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); if (IS_ERR_PCPU(stats)) { err = PTR_ERR_PCPU(stats); goto err_hooks; } } err = -ENOMEM; trans = nft_trans_alloc_chain(ctx, NFT_MSG_NEWCHAIN); if (trans == NULL) goto err_trans; nft_trans_chain_stats(trans) = stats; nft_trans_chain_update(trans) = true; if (nla[NFTA_CHAIN_POLICY]) nft_trans_chain_policy(trans) = policy; else nft_trans_chain_policy(trans) = -1; if (nla[NFTA_CHAIN_HANDLE] && nla[NFTA_CHAIN_NAME]) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); struct nft_trans *tmp; char *name; err = -ENOMEM; name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL_ACCOUNT); if (!name) goto err_trans; err = -EEXIST; list_for_each_entry(tmp, &nft_net->commit_list, list) { if (tmp->msg_type == NFT_MSG_NEWCHAIN && tmp->table == table && nft_trans_chain_update(tmp) && nft_trans_chain_name(tmp) && strcmp(name, nft_trans_chain_name(tmp)) == 0) { NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]); kfree(name); goto err_trans; } } nft_trans_chain_name(trans) = name; } nft_trans_basechain(trans) = basechain; INIT_LIST_HEAD(&nft_trans_chain_hooks(trans)); list_splice(&hook.list, &nft_trans_chain_hooks(trans)); if (nla[NFTA_CHAIN_HOOK]) module_put(hook.type->owner); nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err_trans: free_percpu(stats); kfree(trans); err_hooks: if (nla[NFTA_CHAIN_HOOK]) { list_for_each_entry_safe(h, next, &hook.list, list) { if (unregister) { list_for_each_entry(ops, &h->ops_list, list) nf_unregister_net_hook(ctx->net, ops); } list_del(&h->list); nft_netdev_hook_free_rcu(h); } module_put(hook.type->owner); } return err; } static struct nft_chain *nft_chain_lookup_byid(const struct net *net, const struct nft_table *table, const struct nlattr *nla, u8 genmask) { struct nftables_pernet *nft_net = nft_pernet(net); u32 id = ntohl(nla_get_be32(nla)); struct nft_trans *trans; list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->msg_type == NFT_MSG_NEWCHAIN && nft_trans_chain(trans)->table == table && id == nft_trans_chain_id(trans) && nft_active_genmask(nft_trans_chain(trans), genmask)) return nft_trans_chain(trans); } return ERR_PTR(-ENOENT); } static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct nftables_pernet *nft_net = nft_pernet(info->net); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); u8 family = info->nfmsg->nfgen_family; struct nft_chain *chain = NULL; struct net *net = info->net; const struct nlattr *attr; struct nft_table *table; u8 policy = NF_ACCEPT; struct nft_ctx ctx; u64 handle = 0; u32 flags = 0; lockdep_assert_held(&nft_net->commit_mutex); table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask, NETLINK_CB(skb).portid); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]); return PTR_ERR(table); } chain = NULL; attr = nla[NFTA_CHAIN_NAME]; if (nla[NFTA_CHAIN_HANDLE]) { handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE])); chain = nft_chain_lookup_byhandle(table, handle, genmask); if (IS_ERR(chain)) { NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_HANDLE]); return PTR_ERR(chain); } attr = nla[NFTA_CHAIN_HANDLE]; } else if (nla[NFTA_CHAIN_NAME]) { chain = nft_chain_lookup(net, table, attr, genmask); if (IS_ERR(chain)) { if (PTR_ERR(chain) != -ENOENT) { NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(chain); } chain = NULL; } } else if (!nla[NFTA_CHAIN_ID]) { return -EINVAL; } if (nla[NFTA_CHAIN_POLICY]) { if (chain != NULL && !nft_is_base_chain(chain)) { NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]); return -EOPNOTSUPP; } if (chain == NULL && nla[NFTA_CHAIN_HOOK] == NULL) { NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]); return -EOPNOTSUPP; } policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY])); switch (policy) { case NF_DROP: case NF_ACCEPT: break; default: return -EINVAL; } } if (nla[NFTA_CHAIN_FLAGS]) flags = ntohl(nla_get_be32(nla[NFTA_CHAIN_FLAGS])); else if (chain) flags = chain->flags; if (flags & ~NFT_CHAIN_FLAGS) return -EOPNOTSUPP; nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla); if (chain != NULL) { if (chain->flags & NFT_CHAIN_BINDING) return -EINVAL; if (info->nlh->nlmsg_flags & NLM_F_EXCL) { NL_SET_BAD_ATTR(extack, attr); return -EEXIST; } if (info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; flags |= chain->flags & NFT_CHAIN_BASE; return nf_tables_updchain(&ctx, genmask, policy, flags, attr, extack); } return nf_tables_addchain(&ctx, family, policy, flags, extack); } static int nft_delchain_hook(struct nft_ctx *ctx, struct nft_base_chain *basechain, struct netlink_ext_ack *extack) { const struct nft_chain *chain = &basechain->chain; const struct nlattr * const *nla = ctx->nla; struct nft_chain_hook chain_hook = {}; struct nft_hook *this, *hook; LIST_HEAD(chain_del_list); struct nft_trans *trans; int err; if (ctx->table->flags & __NFT_TABLE_F_UPDATE) return -EOPNOTSUPP; err = nft_chain_parse_hook(ctx->net, basechain, nla, &chain_hook, ctx->family, chain->flags, extack); if (err < 0) return err; list_for_each_entry(this, &chain_hook.list, list) { hook = nft_hook_list_find(&basechain->hook_list, this); if (!hook) { err = -ENOENT; goto err_chain_del_hook; } list_move(&hook->list, &chain_del_list); } trans = nft_trans_alloc_chain(ctx, NFT_MSG_DELCHAIN); if (!trans) { err = -ENOMEM; goto err_chain_del_hook; } nft_trans_basechain(trans) = basechain; nft_trans_chain_update(trans) = true; INIT_LIST_HEAD(&nft_trans_chain_hooks(trans)); list_splice(&chain_del_list, &nft_trans_chain_hooks(trans)); nft_chain_release_hook(&chain_hook); nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err_chain_del_hook: list_splice(&chain_del_list, &basechain->hook_list); nft_chain_release_hook(&chain_hook); return err; } static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); u8 family = info->nfmsg->nfgen_family; struct net *net = info->net; const struct nlattr *attr; struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule; struct nft_ctx ctx; u64 handle; u32 use; int err; table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask, NETLINK_CB(skb).portid); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]); return PTR_ERR(table); } if (nla[NFTA_CHAIN_HANDLE]) { attr = nla[NFTA_CHAIN_HANDLE]; handle = be64_to_cpu(nla_get_be64(attr)); chain = nft_chain_lookup_byhandle(table, handle, genmask); } else { attr = nla[NFTA_CHAIN_NAME]; chain = nft_chain_lookup(net, table, attr, genmask); } if (IS_ERR(chain)) { if (PTR_ERR(chain) == -ENOENT && NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYCHAIN) return 0; NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(chain); } if (nft_chain_binding(chain)) return -EOPNOTSUPP; nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla); if (nla[NFTA_CHAIN_HOOK]) { if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYCHAIN || chain->flags & NFT_CHAIN_HW_OFFLOAD) return -EOPNOTSUPP; if (nft_is_base_chain(chain)) { struct nft_base_chain *basechain = nft_base_chain(chain); if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) return nft_delchain_hook(&ctx, basechain, extack); } } if (info->nlh->nlmsg_flags & NLM_F_NONREC && chain->use > 0) return -EBUSY; use = chain->use; list_for_each_entry(rule, &chain->rules, list) { if (!nft_is_active_next(net, rule)) continue; use--; err = nft_delrule(&ctx, rule); if (err < 0) return err; } /* There are rules and elements that are still holding references to us, * we cannot do a recursive removal in this case. */ if (use > 0) { NL_SET_BAD_ATTR(extack, attr); return -EBUSY; } return nft_delchain(&ctx); } /* * Expressions */ /** * nft_register_expr - register nf_tables expr type * @type: expr type * * Registers the expr type for use with nf_tables. Returns zero on * success or a negative errno code otherwise. */ int nft_register_expr(struct nft_expr_type *type) { if (WARN_ON_ONCE(type->maxattr > NFT_EXPR_MAXATTR)) return -ENOMEM; nfnl_lock(NFNL_SUBSYS_NFTABLES); if (type->family == NFPROTO_UNSPEC) list_add_tail_rcu(&type->list, &nf_tables_expressions); else list_add_rcu(&type->list, &nf_tables_expressions); nfnl_unlock(NFNL_SUBSYS_NFTABLES); return 0; } EXPORT_SYMBOL_GPL(nft_register_expr); /** * nft_unregister_expr - unregister nf_tables expr type * @type: expr type * * Unregisters the expr typefor use with nf_tables. */ void nft_unregister_expr(struct nft_expr_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); list_del_rcu(&type->list); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_unregister_expr); static const struct nft_expr_type *__nft_expr_type_get(u8 family, struct nlattr *nla) { const struct nft_expr_type *type, *candidate = NULL; list_for_each_entry_rcu(type, &nf_tables_expressions, list) { if (!nla_strcmp(nla, type->name)) { if (!type->family && !candidate) candidate = type; else if (type->family == family) candidate = type; } } return candidate; } #ifdef CONFIG_MODULES static int nft_expr_type_request_module(struct net *net, u8 family, struct nlattr *nla) { if (nft_request_module(net, "nft-expr-%u-%.*s", family, nla_len(nla), (char *)nla_data(nla)) == -EAGAIN) return -EAGAIN; return 0; } #endif static const struct nft_expr_type *nft_expr_type_get(struct net *net, u8 family, struct nlattr *nla) { const struct nft_expr_type *type; if (nla == NULL) return ERR_PTR(-EINVAL); rcu_read_lock(); type = __nft_expr_type_get(family, nla); if (type != NULL && try_module_get(type->owner)) { rcu_read_unlock(); return type; } rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES if (type == NULL) { if (nft_expr_type_request_module(net, family, nla) == -EAGAIN) return ERR_PTR(-EAGAIN); if (nft_request_module(net, "nft-expr-%.*s", nla_len(nla), (char *)nla_data(nla)) == -EAGAIN) return ERR_PTR(-EAGAIN); } #endif return ERR_PTR(-ENOENT); } static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = { [NFTA_EXPR_NAME] = { .type = NLA_STRING, .len = NFT_MODULE_AUTOLOAD_LIMIT }, [NFTA_EXPR_DATA] = { .type = NLA_NESTED }, }; static int nf_tables_fill_expr_info(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->type->name)) goto nla_put_failure; if (expr->ops->dump) { struct nlattr *data = nla_nest_start_noflag(skb, NFTA_EXPR_DATA); if (data == NULL) goto nla_put_failure; if (expr->ops->dump(skb, expr, reset) < 0) goto nla_put_failure; nla_nest_end(skb, data); } return skb->len; nla_put_failure: return -1; }; int nft_expr_dump(struct sk_buff *skb, unsigned int attr, const struct nft_expr *expr, bool reset) { struct nlattr *nest; nest = nla_nest_start_noflag(skb, attr); if (!nest) goto nla_put_failure; if (nf_tables_fill_expr_info(skb, expr, reset) < 0) goto nla_put_failure; nla_nest_end(skb, nest); return 0; nla_put_failure: return -1; } struct nft_expr_info { const struct nft_expr_ops *ops; const struct nlattr *attr; struct nlattr *tb[NFT_EXPR_MAXATTR + 1]; }; static int nf_tables_expr_parse(const struct nft_ctx *ctx, const struct nlattr *nla, struct nft_expr_info *info) { const struct nft_expr_type *type; const struct nft_expr_ops *ops; struct nlattr *tb[NFTA_EXPR_MAX + 1]; int err; err = nla_parse_nested_deprecated(tb, NFTA_EXPR_MAX, nla, nft_expr_policy, NULL); if (err < 0) return err; type = nft_expr_type_get(ctx->net, ctx->family, tb[NFTA_EXPR_NAME]); if (IS_ERR(type)) return PTR_ERR(type); if (tb[NFTA_EXPR_DATA]) { err = nla_parse_nested_deprecated(info->tb, type->maxattr, tb[NFTA_EXPR_DATA], type->policy, NULL); if (err < 0) goto err1; } else memset(info->tb, 0, sizeof(info->tb[0]) * (type->maxattr + 1)); if (type->select_ops != NULL) { ops = type->select_ops(ctx, (const struct nlattr * const *)info->tb); if (IS_ERR(ops)) { err = PTR_ERR(ops); #ifdef CONFIG_MODULES if (err == -EAGAIN) if (nft_expr_type_request_module(ctx->net, ctx->family, tb[NFTA_EXPR_NAME]) != -EAGAIN) err = -ENOENT; #endif goto err1; } } else ops = type->ops; info->attr = nla; info->ops = ops; return 0; err1: module_put(type->owner); return err; } int nft_expr_inner_parse(const struct nft_ctx *ctx, const struct nlattr *nla, struct nft_expr_info *info) { struct nlattr *tb[NFTA_EXPR_MAX + 1]; const struct nft_expr_type *type; int err; err = nla_parse_nested_deprecated(tb, NFTA_EXPR_MAX, nla, nft_expr_policy, NULL); if (err < 0) return err; if (!tb[NFTA_EXPR_DATA] || !tb[NFTA_EXPR_NAME]) return -EINVAL; rcu_read_lock(); type = __nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]); if (!type) { err = -ENOENT; goto out_unlock; } if (!type->inner_ops) { err = -EOPNOTSUPP; goto out_unlock; } err = nla_parse_nested_deprecated(info->tb, type->maxattr, tb[NFTA_EXPR_DATA], type->policy, NULL); if (err < 0) goto out_unlock; info->attr = nla; info->ops = type->inner_ops; /* No module reference will be taken on type->owner. * Presence of type->inner_ops implies that the expression * is builtin, so it cannot go away. */ rcu_read_unlock(); return 0; out_unlock: rcu_read_unlock(); return err; } static int nf_tables_newexpr(const struct nft_ctx *ctx, const struct nft_expr_info *expr_info, struct nft_expr *expr) { const struct nft_expr_ops *ops = expr_info->ops; int err; expr->ops = ops; if (ops->init) { err = ops->init(ctx, expr, (const struct nlattr **)expr_info->tb); if (err < 0) goto err1; } return 0; err1: expr->ops = NULL; return err; } static void nf_tables_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr) { const struct nft_expr_type *type = expr->ops->type; if (expr->ops->destroy) expr->ops->destroy(ctx, expr); module_put(type->owner); } static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, const struct nlattr *nla) { struct nft_expr_info expr_info; struct nft_expr *expr; struct module *owner; int err; err = nf_tables_expr_parse(ctx, nla, &expr_info); if (err < 0) goto err_expr_parse; err = -EOPNOTSUPP; if (!(expr_info.ops->type->flags & NFT_EXPR_STATEFUL)) goto err_expr_stateful; err = -ENOMEM; expr = kzalloc(expr_info.ops->size, GFP_KERNEL_ACCOUNT); if (expr == NULL) goto err_expr_stateful; err = nf_tables_newexpr(ctx, &expr_info, expr); if (err < 0) goto err_expr_new; return expr; err_expr_new: kfree(expr); err_expr_stateful: owner = expr_info.ops->type->owner; if (expr_info.ops->type->release_ops) expr_info.ops->type->release_ops(expr_info.ops); module_put(owner); err_expr_parse: return ERR_PTR(err); } int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src, gfp_t gfp) { int err; if (WARN_ON_ONCE(!src->ops->clone)) return -EINVAL; dst->ops = src->ops; err = src->ops->clone(dst, src, gfp); if (err < 0) return err; __module_get(src->ops->type->owner); return 0; } void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr) { nf_tables_expr_destroy(ctx, expr); kfree(expr); } /* * Rules */ static struct nft_rule *__nft_rule_lookup(const struct net *net, const struct nft_chain *chain, u64 handle) { struct nft_rule *rule; // FIXME: this sucks list_for_each_entry_rcu(rule, &chain->rules, list, lockdep_commit_lock_is_held(net)) { if (handle == rule->handle) return rule; } return ERR_PTR(-ENOENT); } static struct nft_rule *nft_rule_lookup(const struct net *net, const struct nft_chain *chain, const struct nlattr *nla) { if (nla == NULL) return ERR_PTR(-EINVAL); return __nft_rule_lookup(net, chain, be64_to_cpu(nla_get_be64(nla))); } static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { [NFTA_RULE_TABLE] = { .type = NLA_STRING, .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_RULE_CHAIN] = { .type = NLA_STRING, .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, [NFTA_RULE_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy), [NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, [NFTA_RULE_POSITION] = { .type = NLA_U64 }, [NFTA_RULE_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN }, [NFTA_RULE_ID] = { .type = NLA_U32 }, [NFTA_RULE_POSITION_ID] = { .type = NLA_U32 }, [NFTA_RULE_CHAIN_ID] = { .type = NLA_U32 }, }; static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq, int event, u32 flags, int family, const struct nft_table *table, const struct nft_chain *chain, const struct nft_rule *rule, u64 handle, bool reset) { struct nlmsghdr *nlh; const struct nft_expr *expr, *next; struct nlattr *list; u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nfnl_msg_put(skb, portid, seq, type, flags, family, NFNETLINK_V0, nft_base_seq(net)); if (!nlh) goto nla_put_failure; if (nla_put_string(skb, NFTA_RULE_TABLE, table->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name)) goto nla_put_failure; if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle), NFTA_RULE_PAD)) goto nla_put_failure; if (event != NFT_MSG_DELRULE && handle) { if (nla_put_be64(skb, NFTA_RULE_POSITION, cpu_to_be64(handle), NFTA_RULE_PAD)) goto nla_put_failure; } if (chain->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_stats(chain, rule); list = nla_nest_start_noflag(skb, NFTA_RULE_EXPRESSIONS); if (list == NULL) goto nla_put_failure; nft_rule_for_each_expr(expr, next, rule) { if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr, reset) < 0) goto nla_put_failure; } nla_nest_end(skb, list); if (rule->udata) { struct nft_userdata *udata = nft_userdata(rule); if (nla_put(skb, NFTA_RULE_USERDATA, udata->len + 1, udata->data) < 0) goto nla_put_failure; } nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_trim(skb, nlh); return -1; } static void nf_tables_rule_notify(const struct nft_ctx *ctx, const struct nft_rule *rule, int event) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); const struct nft_rule *prule; struct sk_buff *skb; u64 handle = 0; u16 flags = 0; int err; if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return; skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) goto err; if (event == NFT_MSG_NEWRULE && !list_is_first(&rule->list, &ctx->chain->rules) && !list_is_last(&rule->list, &ctx->chain->rules)) { prule = list_prev_entry(rule, list); handle = prule->handle; } if (ctx->flags & (NLM_F_APPEND | NLM_F_REPLACE)) flags |= NLM_F_APPEND; if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq, event, flags, ctx->family, ctx->table, ctx->chain, rule, handle, false); if (err < 0) { kfree_skb(skb); goto err; } nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); } static void audit_log_rule_reset(const struct nft_table *table, unsigned int base_seq, unsigned int nentries) { char *buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, base_seq); audit_log_nfcfg(buf, table->family, nentries, AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC); kfree(buf); } struct nft_rule_dump_ctx { unsigned int s_idx; char *table; char *chain; bool reset; }; static int __nf_tables_dump_rules(struct sk_buff *skb, unsigned int *idx, struct netlink_callback *cb, const struct nft_table *table, const struct nft_chain *chain) { struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; struct net *net = sock_net(skb->sk); const struct nft_rule *rule, *prule; unsigned int entries = 0; int ret = 0; u64 handle; prule = NULL; list_for_each_entry_rcu(rule, &chain->rules, list) { if (!nft_is_active(net, rule)) goto cont_skip; if (*idx < ctx->s_idx) goto cont; if (prule) handle = prule->handle; else handle = 0; if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFT_MSG_NEWRULE, NLM_F_MULTI | NLM_F_APPEND, table->family, table, chain, rule, handle, ctx->reset) < 0) { ret = 1; break; } entries++; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: prule = rule; cont_skip: (*idx)++; } if (ctx->reset && entries) audit_log_rule_reset(table, cb->seq, entries); return ret; } static int nf_tables_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; struct nft_table *table; const struct nft_chain *chain; unsigned int idx = 0; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nftables_pernet *nft_net; rcu_read_lock(); nft_net = nft_pernet(net); cb->seq = READ_ONCE(nft_net->base_seq); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; if (ctx->table && strcmp(ctx->table, table->name) != 0) continue; if (ctx->table && ctx->chain) { struct rhlist_head *list, *tmp; list = rhltable_lookup(&table->chains_ht, ctx->chain, nft_chain_ht_params); if (!list) goto done; rhl_for_each_entry_rcu(chain, tmp, list, rhlhead) { if (!nft_is_active(net, chain)) continue; __nf_tables_dump_rules(skb, &idx, cb, table, chain); break; } goto done; } list_for_each_entry_rcu(chain, &table->chains, list) { if (__nf_tables_dump_rules(skb, &idx, cb, table, chain)) goto done; } if (ctx->table) break; } done: rcu_read_unlock(); ctx->s_idx = idx; return skb->len; } static int nf_tables_dumpreset_rules(struct sk_buff *skb, struct netlink_callback *cb) { struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); int ret; /* Mutex is held is to prevent that two concurrent dump-and-reset calls * do not underrun counters and quotas. The commit_mutex is used for * the lack a better lock, this is not transaction path. */ mutex_lock(&nft_net->commit_mutex); ret = nf_tables_dump_rules(skb, cb); mutex_unlock(&nft_net->commit_mutex); return ret; } static int nf_tables_dump_rules_start(struct netlink_callback *cb) { struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; const struct nlattr * const *nla = cb->data; BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); if (nla[NFTA_RULE_TABLE]) { ctx->table = nla_strdup(nla[NFTA_RULE_TABLE], GFP_ATOMIC); if (!ctx->table) return -ENOMEM; } if (nla[NFTA_RULE_CHAIN]) { ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN], GFP_ATOMIC); if (!ctx->chain) { kfree(ctx->table); return -ENOMEM; } } return 0; } static int nf_tables_dumpreset_rules_start(struct netlink_callback *cb) { struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; ctx->reset = true; return nf_tables_dump_rules_start(cb); } static int nf_tables_dump_rules_done(struct netlink_callback *cb) { struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; kfree(ctx->table); kfree(ctx->chain); return 0; } /* Caller must hold rcu read lock or transaction mutex */ static struct sk_buff * nf_tables_getrule_single(u32 portid, const struct nfnl_info *info, const struct nlattr * const nla[], bool reset) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); u8 family = info->nfmsg->nfgen_family; const struct nft_chain *chain; const struct nft_rule *rule; struct net *net = info->net; struct nft_table *table; struct sk_buff *skb2; int err; table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, 0); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]); return ERR_CAST(table); } chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask); if (IS_ERR(chain)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); return ERR_CAST(chain); } rule = nft_rule_lookup(net, chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]); return ERR_CAST(rule); } skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) return ERR_PTR(-ENOMEM); err = nf_tables_fill_rule_info(skb2, net, portid, info->nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, family, table, chain, rule, 0, reset); if (err < 0) { kfree_skb(skb2); return ERR_PTR(err); } return skb2; } static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { u32 portid = NETLINK_CB(skb).portid; struct net *net = info->net; struct sk_buff *skb2; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .start= nf_tables_dump_rules_start, .dump = nf_tables_dump_rules, .done = nf_tables_dump_rules_done, .module = THIS_MODULE, .data = (void *)nla, }; return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } skb2 = nf_tables_getrule_single(portid, info, nla, false); if (IS_ERR(skb2)) return PTR_ERR(skb2); return nfnetlink_unicast(skb2, net, portid); } static int nf_tables_getrule_reset(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct nftables_pernet *nft_net = nft_pernet(info->net); u32 portid = NETLINK_CB(skb).portid; struct net *net = info->net; struct sk_buff *skb2; char *buf; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .start= nf_tables_dumpreset_rules_start, .dump = nf_tables_dumpreset_rules, .done = nf_tables_dump_rules_done, .module = THIS_MODULE, .data = (void *)nla, }; return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } if (!try_module_get(THIS_MODULE)) return -EINVAL; rcu_read_unlock(); mutex_lock(&nft_net->commit_mutex); skb2 = nf_tables_getrule_single(portid, info, nla, true); mutex_unlock(&nft_net->commit_mutex); rcu_read_lock(); module_put(THIS_MODULE); if (IS_ERR(skb2)) return PTR_ERR(skb2); buf = kasprintf(GFP_ATOMIC, "%.*s:%u", nla_len(nla[NFTA_RULE_TABLE]), (char *)nla_data(nla[NFTA_RULE_TABLE]), nft_net->base_seq); audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1, AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC); kfree(buf); return nfnetlink_unicast(skb2, net, portid); } void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule) { struct nft_expr *expr, *next; /* * Careful: some expressions might not be initialized in case this * is called on error from nf_tables_newrule(). */ expr = nft_expr_first(rule); while (nft_expr_more(rule, expr)) { next = nft_expr_next(expr); nf_tables_expr_destroy(ctx, expr); expr = next; } kfree(rule); } /* can only be used if rule is no longer visible to dumps */ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) { lockdep_commit_lock_is_held(ctx->net); nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE); nf_tables_rule_destroy(ctx, rule); } /** nft_chain_validate - loop detection and hook validation * * @ctx: context containing call depth and base chain * @chain: chain to validate * * Walk through the rules of the given chain and chase all jumps/gotos * and set lookups until either the jump limit is hit or all reachable * chains have been validated. */ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) { struct nft_expr *expr, *last; struct nft_rule *rule; int err; if (ctx->level == NFT_JUMP_STACK_SIZE) return -EMLINK; list_for_each_entry(rule, &chain->rules, list) { if (fatal_signal_pending(current)) return -EINTR; if (!nft_is_active_next(ctx->net, rule)) continue; nft_rule_for_each_expr(expr, last, rule) { if (!expr->ops->validate) continue; /* This may call nft_chain_validate() recursively, * callers that do so must increment ctx->level. */ err = expr->ops->validate(ctx, expr); if (err < 0) return err; } } return 0; } EXPORT_SYMBOL_GPL(nft_chain_validate); static int nft_table_validate(struct net *net, const struct nft_table *table) { struct nft_chain *chain; struct nft_ctx ctx = { .net = net, .family = table->family, }; int err; list_for_each_entry(chain, &table->chains, list) { if (!nft_is_base_chain(chain)) continue; ctx.chain = chain; err = nft_chain_validate(&ctx, chain); if (err < 0) return err; cond_resched(); } return 0; } int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, struct nft_elem_priv *elem_priv) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); struct nft_ctx *pctx = (struct nft_ctx *)ctx; const struct nft_data *data; int err; if (!nft_set_elem_active(ext, iter->genmask)) return 0; if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) return 0; data = nft_set_ext_data(ext); switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: pctx->level++; err = nft_chain_validate(ctx, data->verdict.chain); if (err < 0) return err; pctx->level--; break; default: break; } return 0; } int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set) { struct nft_set_iter dummy_iter = { .genmask = nft_genmask_next(ctx->net), }; struct nft_set_elem_catchall *catchall; struct nft_set_ext *ext; int ret = 0; list_for_each_entry_rcu(catchall, &set->catchall_list, list, lockdep_commit_lock_is_held(ctx->net)) { ext = nft_set_elem_ext(set, catchall->elem); if (!nft_set_elem_active(ext, dummy_iter.genmask)) continue; ret = nft_setelem_validate(ctx, set, &dummy_iter, catchall->elem); if (ret < 0) return ret; } return ret; } static struct nft_rule *nft_rule_lookup_byid(const struct net *net, const struct nft_chain *chain, const struct nlattr *nla); #define NFT_RULE_MAXEXPRS 128 static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct nftables_pernet *nft_net = nft_pernet(info->net); struct netlink_ext_ack *extack = info->extack; unsigned int size, i, n, ulen = 0, usize = 0; u8 genmask = nft_genmask_next(info->net); struct nft_rule *rule, *old_rule = NULL; struct nft_expr_info *expr_info = NULL; u8 family = info->nfmsg->nfgen_family; struct nft_flow_rule *flow = NULL; struct net *net = info->net; struct nft_userdata *udata; struct nft_table *table; struct nft_chain *chain; struct nft_trans *trans; u64 handle, pos_handle; struct nft_expr *expr; struct nft_ctx ctx; struct nlattr *tmp; int err, rem; lockdep_assert_held(&nft_net->commit_mutex); table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, NETLINK_CB(skb).portid); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]); return PTR_ERR(table); } if (nla[NFTA_RULE_CHAIN]) { chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask); if (IS_ERR(chain)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); return PTR_ERR(chain); } } else if (nla[NFTA_RULE_CHAIN_ID]) { chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID], genmask); if (IS_ERR(chain)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN_ID]); return PTR_ERR(chain); } } else { return -EINVAL; } if (nft_chain_is_bound(chain)) return -EOPNOTSUPP; if (nla[NFTA_RULE_HANDLE]) { handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE])); rule = __nft_rule_lookup(net, chain, handle); if (IS_ERR(rule)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]); return PTR_ERR(rule); } if (info->nlh->nlmsg_flags & NLM_F_EXCL) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]); return -EEXIST; } if (info->nlh->nlmsg_flags & NLM_F_REPLACE) old_rule = rule; else return -EOPNOTSUPP; } else { if (!(info->nlh->nlmsg_flags & NLM_F_CREATE) || info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EINVAL; handle = nf_tables_alloc_handle(table); if (nla[NFTA_RULE_POSITION]) { pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION])); old_rule = __nft_rule_lookup(net, chain, pos_handle); if (IS_ERR(old_rule)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]); return PTR_ERR(old_rule); } } else if (nla[NFTA_RULE_POSITION_ID]) { old_rule = nft_rule_lookup_byid(net, chain, nla[NFTA_RULE_POSITION_ID]); if (IS_ERR(old_rule)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION_ID]); return PTR_ERR(old_rule); } } } nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla); n = 0; size = 0; if (nla[NFTA_RULE_EXPRESSIONS]) { expr_info = kvmalloc_array(NFT_RULE_MAXEXPRS, sizeof(struct nft_expr_info), GFP_KERNEL); if (!expr_info) return -ENOMEM; nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) { err = -EINVAL; if (nla_type(tmp) != NFTA_LIST_ELEM) goto err_release_expr; if (n == NFT_RULE_MAXEXPRS) goto err_release_expr; err = nf_tables_expr_parse(&ctx, tmp, &expr_info[n]); if (err < 0) { NL_SET_BAD_ATTR(extack, tmp); goto err_release_expr; } size += expr_info[n].ops->size; n++; } } /* Check for overflow of dlen field */ err = -EFBIG; if (size >= 1 << 12) goto err_release_expr; if (nla[NFTA_RULE_USERDATA]) { ulen = nla_len(nla[NFTA_RULE_USERDATA]); if (ulen > 0) usize = sizeof(struct nft_userdata) + ulen; } err = -ENOMEM; rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL_ACCOUNT); if (rule == NULL) goto err_release_expr; nft_activate_next(net, rule); rule->handle = handle; rule->dlen = size; rule->udata = ulen ? 1 : 0; if (ulen) { udata = nft_userdata(rule); udata->len = ulen - 1; nla_memcpy(udata->data, nla[NFTA_RULE_USERDATA], ulen); } expr = nft_expr_first(rule); for (i = 0; i < n; i++) { err = nf_tables_newexpr(&ctx, &expr_info[i], expr); if (err < 0) { NL_SET_BAD_ATTR(extack, expr_info[i].attr); goto err_release_rule; } if (expr_info[i].ops->validate) nft_validate_state_update(table, NFT_VALIDATE_NEED); expr_info[i].ops = NULL; expr = nft_expr_next(expr); } if (chain->flags & NFT_CHAIN_HW_OFFLOAD) { flow = nft_flow_rule_create(net, rule); if (IS_ERR(flow)) { err = PTR_ERR(flow); goto err_release_rule; } } if (!nft_use_inc(&chain->use)) { err = -EMFILE; goto err_release_rule; } if (info->nlh->nlmsg_flags & NLM_F_REPLACE) { if (nft_chain_binding(chain)) { err = -EOPNOTSUPP; goto err_destroy_flow_rule; } err = nft_delrule(&ctx, old_rule); if (err < 0) goto err_destroy_flow_rule; trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule); if (trans == NULL) { err = -ENOMEM; goto err_destroy_flow_rule; } list_add_tail_rcu(&rule->list, &old_rule->list); } else { trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule); if (!trans) { err = -ENOMEM; goto err_destroy_flow_rule; } if (info->nlh->nlmsg_flags & NLM_F_APPEND) { if (old_rule) list_add_rcu(&rule->list, &old_rule->list); else list_add_tail_rcu(&rule->list, &chain->rules); } else { if (old_rule) list_add_tail_rcu(&rule->list, &old_rule->list); else list_add_rcu(&rule->list, &chain->rules); } } kvfree(expr_info); if (flow) nft_trans_flow_rule(trans) = flow; if (table->validate_state == NFT_VALIDATE_DO) return nft_table_validate(net, table); return 0; err_destroy_flow_rule: nft_use_dec_restore(&chain->use); if (flow) nft_flow_rule_destroy(flow); err_release_rule: nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE_ERROR); nf_tables_rule_destroy(&ctx, rule); err_release_expr: for (i = 0; i < n; i++) { if (expr_info[i].ops) { module_put(expr_info[i].ops->type->owner); if (expr_info[i].ops->type->release_ops) expr_info[i].ops->type->release_ops(expr_info[i].ops); } } kvfree(expr_info); return err; } static struct nft_rule *nft_rule_lookup_byid(const struct net *net, const struct nft_chain *chain, const struct nlattr *nla) { struct nftables_pernet *nft_net = nft_pernet(net); u32 id = ntohl(nla_get_be32(nla)); struct nft_trans *trans; list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->msg_type == NFT_MSG_NEWRULE && nft_trans_rule_chain(trans) == chain && id == nft_trans_rule_id(trans)) return nft_trans_rule(trans); } return ERR_PTR(-ENOENT); } static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); u8 family = info->nfmsg->nfgen_family; struct nft_chain *chain = NULL; struct net *net = info->net; struct nft_table *table; struct nft_rule *rule; struct nft_ctx ctx; int err = 0; table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, NETLINK_CB(skb).portid); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]); return PTR_ERR(table); } if (nla[NFTA_RULE_CHAIN]) { chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask); if (IS_ERR(chain)) { if (PTR_ERR(chain) == -ENOENT && NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYRULE) return 0; NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); return PTR_ERR(chain); } if (nft_chain_binding(chain)) return -EOPNOTSUPP; } nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla); if (chain) { if (nla[NFTA_RULE_HANDLE]) { rule = nft_rule_lookup(info->net, chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) { if (PTR_ERR(rule) == -ENOENT && NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYRULE) return 0; NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]); return PTR_ERR(rule); } err = nft_delrule(&ctx, rule); } else if (nla[NFTA_RULE_ID]) { rule = nft_rule_lookup_byid(net, chain, nla[NFTA_RULE_ID]); if (IS_ERR(rule)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_ID]); return PTR_ERR(rule); } err = nft_delrule(&ctx, rule); } else { err = nft_delrule_by_chain(&ctx); } } else { list_for_each_entry(chain, &table->chains, list) { if (!nft_is_active_next(net, chain)) continue; if (nft_chain_binding(chain)) continue; ctx.chain = chain; err = nft_delrule_by_chain(&ctx); if (err < 0) break; } } return err; } /* * Sets */ static const struct nft_set_type *nft_set_types[] = { &nft_set_hash_fast_type, &nft_set_hash_type, &nft_set_rhash_type, &nft_set_bitmap_type, &nft_set_rbtree_type, #if defined(CONFIG_X86_64) && !defined(CONFIG_UML) &nft_set_pipapo_avx2_type, #endif &nft_set_pipapo_type, }; #define NFT_SET_FEATURES (NFT_SET_INTERVAL | NFT_SET_MAP | \ NFT_SET_TIMEOUT | NFT_SET_OBJECT | \ NFT_SET_EVAL) static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags) { return (flags & type->features) == (flags & NFT_SET_FEATURES); } /* * Select a set implementation based on the data characteristics and the * given policy. The total memory use might not be known if no size is * given, in that case the amount of memory per element is used. */ static const struct nft_set_ops * nft_select_set_ops(const struct nft_ctx *ctx, u32 flags, const struct nft_set_desc *desc) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); const struct nft_set_ops *ops, *bops; struct nft_set_estimate est, best; const struct nft_set_type *type; int i; lockdep_assert_held(&nft_net->commit_mutex); lockdep_nfnl_nft_mutex_not_held(); bops = NULL; best.size = ~0; best.lookup = ~0; best.space = ~0; for (i = 0; i < ARRAY_SIZE(nft_set_types); i++) { type = nft_set_types[i]; ops = &type->ops; if (!nft_set_ops_candidate(type, flags)) continue; if (!ops->estimate(desc, flags, &est)) continue; switch (desc->policy) { case NFT_SET_POL_PERFORMANCE: if (est.lookup < best.lookup) break; if (est.lookup == best.lookup && est.space < best.space) break; continue; case NFT_SET_POL_MEMORY: if (!desc->size) { if (est.space < best.space) break; if (est.space == best.space && est.lookup < best.lookup) break; } else if (est.size < best.size || !bops) { break; } continue; default: break; } bops = ops; best = est; } if (bops != NULL) return bops; return ERR_PTR(-EOPNOTSUPP); } static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_TABLE] = { .type = NLA_STRING, .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_SET_NAME] = { .type = NLA_STRING, .len = NFT_SET_MAXNAMELEN - 1 }, [NFTA_SET_FLAGS] = { .type = NLA_U32 }, [NFTA_SET_KEY_TYPE] = { .type = NLA_U32 }, [NFTA_SET_KEY_LEN] = { .type = NLA_U32 }, [NFTA_SET_DATA_TYPE] = { .type = NLA_U32 }, [NFTA_SET_DATA_LEN] = { .type = NLA_U32 }, [NFTA_SET_POLICY] = { .type = NLA_U32 }, [NFTA_SET_DESC] = { .type = NLA_NESTED }, [NFTA_SET_ID] = { .type = NLA_U32 }, [NFTA_SET_TIMEOUT] = { .type = NLA_U64 }, [NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 }, [NFTA_SET_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN }, [NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 }, [NFTA_SET_HANDLE] = { .type = NLA_U64 }, [NFTA_SET_EXPR] = { .type = NLA_NESTED }, [NFTA_SET_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy), [NFTA_SET_TYPE] = { .type = NLA_REJECT }, [NFTA_SET_COUNT] = { .type = NLA_REJECT }, }; static const struct nla_policy nft_concat_policy[NFTA_SET_FIELD_MAX + 1] = { [NFTA_SET_FIELD_LEN] = { .type = NLA_U32 }, }; static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { [NFTA_SET_DESC_SIZE] = { .type = NLA_U32 }, [NFTA_SET_DESC_CONCAT] = NLA_POLICY_NESTED_ARRAY(nft_concat_policy), }; static struct nft_set *nft_set_lookup(const struct net *net, const struct nft_table *table, const struct nlattr *nla, u8 genmask) { struct nft_set *set; if (nla == NULL) return ERR_PTR(-EINVAL); list_for_each_entry_rcu(set, &table->sets, list, lockdep_commit_lock_is_held(net)) { if (!nla_strcmp(nla, set->name) && nft_active_genmask(set, genmask)) return set; } return ERR_PTR(-ENOENT); } static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table, const struct nlattr *nla, u8 genmask) { struct nft_set *set; list_for_each_entry(set, &table->sets, list) { if (be64_to_cpu(nla_get_be64(nla)) == set->handle && nft_active_genmask(set, genmask)) return set; } return ERR_PTR(-ENOENT); } static struct nft_set *nft_set_lookup_byid(const struct net *net, const struct nft_table *table, const struct nlattr *nla, u8 genmask) { struct nftables_pernet *nft_net = nft_pernet(net); u32 id = ntohl(nla_get_be32(nla)); struct nft_trans_set *trans; /* its likely the id we need is at the tail, not at start */ list_for_each_entry_reverse(trans, &nft_net->commit_set_list, list_trans_newset) { struct nft_set *set = trans->set; if (id == trans->set_id && set->table == table && nft_active_genmask(set, genmask)) return set; } return ERR_PTR(-ENOENT); } struct nft_set *nft_set_lookup_global(const struct net *net, const struct nft_table *table, const struct nlattr *nla_set_name, const struct nlattr *nla_set_id, u8 genmask) { struct nft_set *set; set = nft_set_lookup(net, table, nla_set_name, genmask); if (IS_ERR(set)) { if (!nla_set_id) return set; set = nft_set_lookup_byid(net, table, nla_set_id, genmask); } return set; } EXPORT_SYMBOL_GPL(nft_set_lookup_global); static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, const char *name) { const struct nft_set *i; const char *p; unsigned long *inuse; unsigned int n = 0, min = 0; p = strchr(name, '%'); if (p != NULL) { if (p[1] != 'd' || strchr(p + 2, '%')) return -EINVAL; if (strnlen(name, NFT_SET_MAX_ANONLEN) >= NFT_SET_MAX_ANONLEN) return -EINVAL; inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL); if (inuse == NULL) return -ENOMEM; cont: list_for_each_entry(i, &ctx->table->sets, list) { int tmp; if (!nft_is_active_next(ctx->net, i)) continue; if (!sscanf(i->name, name, &tmp)) continue; if (tmp < min || tmp >= min + BITS_PER_BYTE * PAGE_SIZE) continue; set_bit(tmp - min, inuse); } n = find_first_zero_bit(inuse, BITS_PER_BYTE * PAGE_SIZE); if (n >= BITS_PER_BYTE * PAGE_SIZE) { min += BITS_PER_BYTE * PAGE_SIZE; memset(inuse, 0, PAGE_SIZE); goto cont; } free_page((unsigned long)inuse); } set->name = kasprintf(GFP_KERNEL_ACCOUNT, name, min + n); if (!set->name) return -ENOMEM; list_for_each_entry(i, &ctx->table->sets, list) { if (!nft_is_active_next(ctx->net, i)) continue; if (!strcmp(set->name, i->name)) { kfree(set->name); set->name = NULL; return -ENFILE; } } return 0; } int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) { u64 ms = be64_to_cpu(nla_get_be64(nla)); u64 max = (u64)(~((u64)0)); max = div_u64(max, NSEC_PER_MSEC); if (ms >= max) return -ERANGE; ms *= NSEC_PER_MSEC; *result = nsecs_to_jiffies64(ms) ? : !!ms; return 0; } __be64 nf_jiffies64_to_msecs(u64 input) { return cpu_to_be64(jiffies64_to_msecs(input)); } static int nf_tables_fill_set_concat(struct sk_buff *skb, const struct nft_set *set) { struct nlattr *concat, *field; int i; concat = nla_nest_start_noflag(skb, NFTA_SET_DESC_CONCAT); if (!concat) return -ENOMEM; for (i = 0; i < set->field_count; i++) { field = nla_nest_start_noflag(skb, NFTA_LIST_ELEM); if (!field) return -ENOMEM; if (nla_put_be32(skb, NFTA_SET_FIELD_LEN, htonl(set->field_len[i]))) return -ENOMEM; nla_nest_end(skb, field); } nla_nest_end(skb, concat); return 0; } static u32 nft_set_userspace_size(const struct nft_set_ops *ops, u32 size) { if (ops->usize) return ops->usize(size); return size; } static noinline_for_stack int nf_tables_fill_set_info(struct sk_buff *skb, const struct nft_set *set) { unsigned int nelems; char str[40]; int ret; ret = snprintf(str, sizeof(str), "%ps", set->ops); /* Not expected to happen and harmless: NFTA_SET_TYPE is dumped * to userspace purely for informational/debug purposes. */ DEBUG_NET_WARN_ON_ONCE(ret >= sizeof(str)); if (nla_put_string(skb, NFTA_SET_TYPE, str)) return -EMSGSIZE; nelems = nft_set_userspace_size(set->ops, atomic_read(&set->nelems)); return nla_put_be32(skb, NFTA_SET_COUNT, htonl(nelems)); } static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, const struct nft_set *set, u16 event, u16 flags) { u64 timeout = READ_ONCE(set->timeout); u32 gc_int = READ_ONCE(set->gc_int); u32 portid = ctx->portid; struct nlmsghdr *nlh; struct nlattr *nest; u32 seq = ctx->seq; int i; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family, NFNETLINK_V0, nft_base_seq(ctx->net)); if (!nlh) goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_NAME, set->name)) goto nla_put_failure; if (nla_put_be64(skb, NFTA_SET_HANDLE, cpu_to_be64(set->handle), NFTA_SET_PAD)) goto nla_put_failure; if (event == NFT_MSG_DELSET) { nlmsg_end(skb, nlh); return 0; } if (set->flags != 0) if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_SET_KEY_TYPE, htonl(set->ktype))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_SET_KEY_LEN, htonl(set->klen))) goto nla_put_failure; if (set->flags & NFT_SET_MAP) { if (nla_put_be32(skb, NFTA_SET_DATA_TYPE, htonl(set->dtype))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_SET_DATA_LEN, htonl(set->dlen))) goto nla_put_failure; } if (set->flags & NFT_SET_OBJECT && nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype))) goto nla_put_failure; if (timeout && nla_put_be64(skb, NFTA_SET_TIMEOUT, nf_jiffies64_to_msecs(timeout), NFTA_SET_PAD)) goto nla_put_failure; if (gc_int && nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(gc_int))) goto nla_put_failure; if (set->policy != NFT_SET_POL_PERFORMANCE) { if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy))) goto nla_put_failure; } if (set->udata && nla_put(skb, NFTA_SET_USERDATA, set->udlen, set->udata)) goto nla_put_failure; nest = nla_nest_start_noflag(skb, NFTA_SET_DESC); if (!nest) goto nla_put_failure; if (set->size && nla_put_be32(skb, NFTA_SET_DESC_SIZE, htonl(nft_set_userspace_size(set->ops, set->size)))) goto nla_put_failure; if (set->field_count > 1 && nf_tables_fill_set_concat(skb, set)) goto nla_put_failure; nla_nest_end(skb, nest); if (nf_tables_fill_set_info(skb, set)) goto nla_put_failure; if (set->num_exprs == 1) { nest = nla_nest_start_noflag(skb, NFTA_SET_EXPR); if (nf_tables_fill_expr_info(skb, set->exprs[0], false) < 0) goto nla_put_failure; nla_nest_end(skb, nest); } else if (set->num_exprs > 1) { nest = nla_nest_start_noflag(skb, NFTA_SET_EXPRESSIONS); if (nest == NULL) goto nla_put_failure; for (i = 0; i < set->num_exprs; i++) { if (nft_expr_dump(skb, NFTA_LIST_ELEM, set->exprs[i], false) < 0) goto nla_put_failure; } nla_nest_end(skb, nest); } nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_trim(skb, nlh); return -1; } static void nf_tables_set_notify(const struct nft_ctx *ctx, const struct nft_set *set, int event, gfp_t gfp_flags) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); u32 portid = ctx->portid; struct sk_buff *skb; u16 flags = 0; int err; if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return; skb = nlmsg_new(NLMSG_GOODSIZE, gfp_flags); if (skb == NULL) goto err; if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); err = nf_tables_fill_set(skb, ctx, set, event, flags); if (err < 0) { kfree_skb(skb); goto err; } nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS); } static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) { const struct nft_set *set; unsigned int idx, s_idx = cb->args[0]; struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; struct net *net = sock_net(skb->sk); struct nft_ctx *ctx = cb->data, ctx_set; struct nftables_pernet *nft_net; if (cb->args[1]) return skb->len; rcu_read_lock(); nft_net = nft_pernet(net); cb->seq = READ_ONCE(nft_net->base_seq); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (ctx->family != NFPROTO_UNSPEC && ctx->family != table->family) continue; if (ctx->table && ctx->table != table) continue; if (cur_table) { if (cur_table != table) continue; cur_table = NULL; } idx = 0; list_for_each_entry_rcu(set, &table->sets, list) { if (idx < s_idx) goto cont; if (!nft_is_active(net, set)) goto cont; ctx_set = *ctx; ctx_set.table = table; ctx_set.family = table->family; if (nf_tables_fill_set(skb, &ctx_set, set, NFT_MSG_NEWSET, NLM_F_MULTI) < 0) { cb->args[0] = idx; cb->args[2] = (unsigned long) table; goto done; } nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } if (s_idx) s_idx = 0; } cb->args[1] = 1; done: rcu_read_unlock(); return skb->len; } static int nf_tables_dump_sets_start(struct netlink_callback *cb) { struct nft_ctx *ctx_dump = NULL; ctx_dump = kmemdup(cb->data, sizeof(*ctx_dump), GFP_ATOMIC); if (ctx_dump == NULL) return -ENOMEM; cb->data = ctx_dump; return 0; } static int nf_tables_dump_sets_done(struct netlink_callback *cb) { kfree(cb->data); return 0; } /* called with rcu_read_lock held */ static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); u8 family = info->nfmsg->nfgen_family; struct nft_table *table = NULL; struct net *net = info->net; const struct nft_set *set; struct sk_buff *skb2; struct nft_ctx ctx; int err; if (nla[NFTA_SET_TABLE]) { table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family, genmask, 0); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]); return PTR_ERR(table); } } nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .start = nf_tables_dump_sets_start, .dump = nf_tables_dump_sets, .done = nf_tables_dump_sets_done, .data = &ctx, .module = THIS_MODULE, }; return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } /* Only accept unspec with dump */ if (info->nfmsg->nfgen_family == NFPROTO_UNSPEC) return -EAFNOSUPPORT; if (!nla[NFTA_SET_TABLE]) return -EINVAL; set = nft_set_lookup(net, table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); return PTR_ERR(set); } skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (skb2 == NULL) return -ENOMEM; err = nf_tables_fill_set(skb2, &ctx, set, NFT_MSG_NEWSET, 0); if (err < 0) goto err_fill_set_info; return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); err_fill_set_info: kfree_skb(skb2); return err; } static int nft_set_desc_concat_parse(const struct nlattr *attr, struct nft_set_desc *desc) { struct nlattr *tb[NFTA_SET_FIELD_MAX + 1]; u32 len; int err; if (desc->field_count >= ARRAY_SIZE(desc->field_len)) return -E2BIG; err = nla_parse_nested_deprecated(tb, NFTA_SET_FIELD_MAX, attr, nft_concat_policy, NULL); if (err < 0) return err; if (!tb[NFTA_SET_FIELD_LEN]) return -EINVAL; len = ntohl(nla_get_be32(tb[NFTA_SET_FIELD_LEN])); if (!len || len > U8_MAX) return -EINVAL; desc->field_len[desc->field_count++] = len; return 0; } static int nft_set_desc_concat(struct nft_set_desc *desc, const struct nlattr *nla) { u32 len = 0, num_regs; struct nlattr *attr; int rem, err, i; nla_for_each_nested(attr, nla, rem) { if (nla_type(attr) != NFTA_LIST_ELEM) return -EINVAL; err = nft_set_desc_concat_parse(attr, desc); if (err < 0) return err; } for (i = 0; i < desc->field_count; i++) len += round_up(desc->field_len[i], sizeof(u32)); if (len != desc->klen) return -EINVAL; num_regs = DIV_ROUND_UP(desc->klen, sizeof(u32)); if (num_regs > NFT_REG32_COUNT) return -E2BIG; return 0; } static int nf_tables_set_desc_parse(struct nft_set_desc *desc, const struct nlattr *nla) { struct nlattr *da[NFTA_SET_DESC_MAX + 1]; int err; err = nla_parse_nested_deprecated(da, NFTA_SET_DESC_MAX, nla, nft_set_desc_policy, NULL); if (err < 0) return err; if (da[NFTA_SET_DESC_SIZE] != NULL) desc->size = ntohl(nla_get_be32(da[NFTA_SET_DESC_SIZE])); if (da[NFTA_SET_DESC_CONCAT]) err = nft_set_desc_concat(desc, da[NFTA_SET_DESC_CONCAT]); return err; } static int nft_set_expr_alloc(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr * const *nla, struct nft_expr **exprs, int *num_exprs, u32 flags) { struct nft_expr *expr; int err, i; if (nla[NFTA_SET_EXPR]) { expr = nft_set_elem_expr_alloc(ctx, set, nla[NFTA_SET_EXPR]); if (IS_ERR(expr)) { err = PTR_ERR(expr); goto err_set_expr_alloc; } exprs[0] = expr; (*num_exprs)++; } else if (nla[NFTA_SET_EXPRESSIONS]) { struct nlattr *tmp; int left; if (!(flags & NFT_SET_EXPR)) { err = -EINVAL; goto err_set_expr_alloc; } i = 0; nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) { if (i == NFT_SET_EXPR_MAX) { err = -E2BIG; goto err_set_expr_alloc; } if (nla_type(tmp) != NFTA_LIST_ELEM) { err = -EINVAL; goto err_set_expr_alloc; } expr = nft_set_elem_expr_alloc(ctx, set, tmp); if (IS_ERR(expr)) { err = PTR_ERR(expr); goto err_set_expr_alloc; } exprs[i++] = expr; (*num_exprs)++; } } return 0; err_set_expr_alloc: for (i = 0; i < *num_exprs; i++) nft_expr_destroy(ctx, exprs[i]); return err; } static bool nft_set_is_same(const struct nft_set *set, const struct nft_set_desc *desc, struct nft_expr *exprs[], u32 num_exprs, u32 flags) { int i; if (set->ktype != desc->ktype || set->dtype != desc->dtype || set->flags != flags || set->klen != desc->klen || set->dlen != desc->dlen || set->field_count != desc->field_count || set->num_exprs != num_exprs) return false; for (i = 0; i < desc->field_count; i++) { if (set->field_len[i] != desc->field_len[i]) return false; } for (i = 0; i < num_exprs; i++) { if (set->exprs[i]->ops != exprs[i]->ops) return false; } return true; } static u32 nft_set_kernel_size(const struct nft_set_ops *ops, const struct nft_set_desc *desc) { if (ops->ksize) return ops->ksize(desc->size); return desc->size; } static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); u8 family = info->nfmsg->nfgen_family; const struct nft_set_ops *ops; struct net *net = info->net; struct nft_set_desc desc; struct nft_table *table; unsigned char *udata; struct nft_set *set; struct nft_ctx ctx; size_t alloc_size; int num_exprs = 0; char *name; int err, i; u16 udlen; u32 flags; u64 size; if (nla[NFTA_SET_TABLE] == NULL || nla[NFTA_SET_NAME] == NULL || nla[NFTA_SET_KEY_LEN] == NULL || nla[NFTA_SET_ID] == NULL) return -EINVAL; memset(&desc, 0, sizeof(desc)); desc.ktype = NFT_DATA_VALUE; if (nla[NFTA_SET_KEY_TYPE] != NULL) { desc.ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE])); if ((desc.ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK) return -EINVAL; } desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); if (desc.klen == 0 || desc.klen > NFT_DATA_VALUE_MAXLEN) return -EINVAL; flags = 0; if (nla[NFTA_SET_FLAGS] != NULL) { flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT | NFT_SET_INTERVAL | NFT_SET_TIMEOUT | NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT | NFT_SET_CONCAT | NFT_SET_EXPR)) return -EOPNOTSUPP; /* Only one of these operations is supported */ if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) == (NFT_SET_MAP | NFT_SET_OBJECT)) return -EOPNOTSUPP; if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) == (NFT_SET_EVAL | NFT_SET_OBJECT)) return -EOPNOTSUPP; if ((flags & (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT | NFT_SET_EVAL)) == (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT)) return -EOPNOTSUPP; if ((flags & (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) == (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) return -EOPNOTSUPP; } desc.dtype = 0; if (nla[NFTA_SET_DATA_TYPE] != NULL) { if (!(flags & NFT_SET_MAP)) return -EINVAL; desc.dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE])); if ((desc.dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK && desc.dtype != NFT_DATA_VERDICT) return -EINVAL; if (desc.dtype != NFT_DATA_VERDICT) { if (nla[NFTA_SET_DATA_LEN] == NULL) return -EINVAL; desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); if (desc.dlen == 0 || desc.dlen > NFT_DATA_VALUE_MAXLEN) return -EINVAL; } else desc.dlen = sizeof(struct nft_verdict); } else if (flags & NFT_SET_MAP) return -EINVAL; if (nla[NFTA_SET_OBJ_TYPE] != NULL) { if (!(flags & NFT_SET_OBJECT)) return -EINVAL; desc.objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE])); if (desc.objtype == NFT_OBJECT_UNSPEC || desc.objtype > NFT_OBJECT_MAX) return -EOPNOTSUPP; } else if (flags & NFT_SET_OBJECT) return -EINVAL; else desc.objtype = NFT_OBJECT_UNSPEC; desc.timeout = 0; if (nla[NFTA_SET_TIMEOUT] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; if (flags & NFT_SET_ANONYMOUS) return -EOPNOTSUPP; err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout); if (err) return err; } desc.gc_int = 0; if (nla[NFTA_SET_GC_INTERVAL] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; if (flags & NFT_SET_ANONYMOUS) return -EOPNOTSUPP; desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); } desc.policy = NFT_SET_POL_PERFORMANCE; if (nla[NFTA_SET_POLICY] != NULL) { desc.policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); switch (desc.policy) { case NFT_SET_POL_PERFORMANCE: case NFT_SET_POL_MEMORY: break; default: return -EOPNOTSUPP; } } if (nla[NFTA_SET_DESC] != NULL) { err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]); if (err < 0) return err; if (desc.field_count > 1) { if (!(flags & NFT_SET_CONCAT)) return -EINVAL; } else if (flags & NFT_SET_CONCAT) { return -EINVAL; } } else if (flags & NFT_SET_CONCAT) { return -EINVAL; } if (nla[NFTA_SET_EXPR] || nla[NFTA_SET_EXPRESSIONS]) desc.expr = true; table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family, genmask, NETLINK_CB(skb).portid); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]); return PTR_ERR(table); } nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); set = nft_set_lookup(net, table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) { if (PTR_ERR(set) != -ENOENT) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); return PTR_ERR(set); } } else { struct nft_expr *exprs[NFT_SET_EXPR_MAX] = {}; if (info->nlh->nlmsg_flags & NLM_F_EXCL) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); return -EEXIST; } if (info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; if (nft_set_is_anonymous(set)) return -EOPNOTSUPP; err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags); if (err < 0) return err; if (desc.size) desc.size = nft_set_kernel_size(set->ops, &desc); err = 0; if (!nft_set_is_same(set, &desc, exprs, num_exprs, flags)) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); err = -EEXIST; } for (i = 0; i < num_exprs; i++) nft_expr_destroy(&ctx, exprs[i]); if (err < 0) return err; return __nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set, &desc); } if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) return -ENOENT; ops = nft_select_set_ops(&ctx, flags, &desc); if (IS_ERR(ops)) return PTR_ERR(ops); if (desc.size) desc.size = nft_set_kernel_size(ops, &desc); udlen = 0; if (nla[NFTA_SET_USERDATA]) udlen = nla_len(nla[NFTA_SET_USERDATA]); size = 0; if (ops->privsize != NULL) size = ops->privsize(nla, &desc); alloc_size = sizeof(*set) + size + udlen; if (alloc_size < size || alloc_size > INT_MAX) return -ENOMEM; if (!nft_use_inc(&table->use)) return -EMFILE; set = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT); if (!set) { err = -ENOMEM; goto err_alloc; } name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL_ACCOUNT); if (!name) { err = -ENOMEM; goto err_set_name; } err = nf_tables_set_alloc_name(&ctx, set, name); kfree(name); if (err < 0) goto err_set_name; udata = NULL; if (udlen) { udata = set->data + size; nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen); } INIT_LIST_HEAD(&set->bindings); INIT_LIST_HEAD(&set->catchall_list); refcount_set(&set->refs, 1); set->table = table; write_pnet(&set->net, net); set->ops = ops; set->ktype = desc.ktype; set->klen = desc.klen; set->dtype = desc.dtype; set->objtype = desc.objtype; set->dlen = desc.dlen; set->flags = flags; set->size = desc.size; set->policy = desc.policy; set->udlen = udlen; set->udata = udata; set->timeout = desc.timeout; set->gc_int = desc.gc_int; set->field_count = desc.field_count; for (i = 0; i < desc.field_count; i++) set->field_len[i] = desc.field_len[i]; err = ops->init(set, &desc, nla); if (err < 0) goto err_set_init; err = nft_set_expr_alloc(&ctx, set, nla, set->exprs, &num_exprs, flags); if (err < 0) goto err_set_destroy; set->num_exprs = num_exprs; set->handle = nf_tables_alloc_handle(table); INIT_LIST_HEAD(&set->pending_update); err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) goto err_set_expr_alloc; list_add_tail_rcu(&set->list, &table->sets); return 0; err_set_expr_alloc: for (i = 0; i < set->num_exprs; i++) nft_expr_destroy(&ctx, set->exprs[i]); err_set_destroy: ops->destroy(&ctx, set); err_set_init: kfree(set->name); err_set_name: kvfree(set); err_alloc: nft_use_dec_restore(&table->use); return err; } static void nft_set_catchall_destroy(const struct nft_ctx *ctx, struct nft_set *set) { struct nft_set_elem_catchall *next, *catchall; list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { list_del_rcu(&catchall->list); nf_tables_set_elem_destroy(ctx, set, catchall->elem); kfree_rcu(catchall, rcu); } } static void nft_set_put(struct nft_set *set) { if (refcount_dec_and_test(&set->refs)) { kfree(set->name); kvfree(set); } } static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) { int i; if (WARN_ON(set->use > 0)) return; for (i = 0; i < set->num_exprs; i++) nft_expr_destroy(ctx, set->exprs[i]); set->ops->destroy(ctx, set); nft_set_catchall_destroy(ctx, set); nft_set_put(set); } static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); u8 family = info->nfmsg->nfgen_family; struct net *net = info->net; const struct nlattr *attr; struct nft_table *table; struct nft_set *set; struct nft_ctx ctx; if (info->nfmsg->nfgen_family == NFPROTO_UNSPEC) return -EAFNOSUPPORT; table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family, genmask, NETLINK_CB(skb).portid); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]); return PTR_ERR(table); } if (nla[NFTA_SET_HANDLE]) { attr = nla[NFTA_SET_HANDLE]; set = nft_set_lookup_byhandle(table, attr, genmask); } else { attr = nla[NFTA_SET_NAME]; set = nft_set_lookup(net, table, attr, genmask); } if (IS_ERR(set)) { if (PTR_ERR(set) == -ENOENT && NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYSET) return 0; NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(set); } if (set->use || (info->nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) { NL_SET_BAD_ATTR(extack, attr); return -EBUSY; } nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); return nft_delset(&ctx, set); } static int nft_validate_register_store(const struct nft_ctx *ctx, enum nft_registers reg, const struct nft_data *data, enum nft_data_types type, unsigned int len); static int nft_setelem_data_validate(const struct nft_ctx *ctx, struct nft_set *set, struct nft_elem_priv *elem_priv) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); enum nft_registers dreg; dreg = nft_type_to_reg(set->dtype); return nft_validate_register_store(ctx, dreg, nft_set_ext_data(ext), set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE, set->dlen); } static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, struct nft_elem_priv *elem_priv) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); if (!nft_set_elem_active(ext, iter->genmask)) return 0; return nft_setelem_data_validate(ctx, set, elem_priv); } static int nft_set_catchall_bind_check(const struct nft_ctx *ctx, struct nft_set *set) { u8 genmask = nft_genmask_next(ctx->net); struct nft_set_elem_catchall *catchall; struct nft_set_ext *ext; int ret = 0; list_for_each_entry_rcu(catchall, &set->catchall_list, list, lockdep_commit_lock_is_held(ctx->net)) { ext = nft_set_elem_ext(set, catchall->elem); if (!nft_set_elem_active(ext, genmask)) continue; ret = nft_setelem_data_validate(ctx, set, catchall->elem); if (ret < 0) break; } return ret; } int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding) { struct nft_set_binding *i; struct nft_set_iter iter; if (!list_empty(&set->bindings) && nft_set_is_anonymous(set)) return -EBUSY; if (binding->flags & NFT_SET_MAP) { /* If the set is already bound to the same chain all * jumps are already validated for that chain. */ list_for_each_entry(i, &set->bindings, list) { if (i->flags & NFT_SET_MAP && i->chain == binding->chain) goto bind; } iter.genmask = nft_genmask_next(ctx->net); iter.type = NFT_ITER_UPDATE; iter.skip = 0; iter.count = 0; iter.err = 0; iter.fn = nf_tables_bind_check_setelem; set->ops->walk(ctx, set, &iter); if (!iter.err) iter.err = nft_set_catchall_bind_check(ctx, set); if (iter.err < 0) return iter.err; } bind: if (!nft_use_inc(&set->use)) return -EMFILE; binding->chain = ctx->chain; list_add_tail_rcu(&binding->list, &set->bindings); nft_set_trans_bind(ctx, set); return 0; } EXPORT_SYMBOL_GPL(nf_tables_bind_set); static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, bool event) { list_del_rcu(&binding->list); if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { list_del_rcu(&set->list); set->dead = 1; if (event) nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_KERNEL); } } static void nft_setelem_data_activate(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv); static int nft_mapelem_activate(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, struct nft_elem_priv *elem_priv) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); /* called from abort path, reverse check to undo changes. */ if (nft_set_elem_active(ext, iter->genmask)) return 0; nft_clear(ctx->net, ext); nft_setelem_data_activate(ctx->net, set, elem_priv); return 0; } static void nft_map_catchall_activate(const struct nft_ctx *ctx, struct nft_set *set) { u8 genmask = nft_genmask_next(ctx->net); struct nft_set_elem_catchall *catchall; struct nft_set_ext *ext; list_for_each_entry(catchall, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); if (!nft_set_elem_active(ext, genmask)) continue; nft_clear(ctx->net, ext); nft_setelem_data_activate(ctx->net, set, catchall->elem); break; } } static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set) { struct nft_set_iter iter = { .genmask = nft_genmask_next(ctx->net), .type = NFT_ITER_UPDATE, .fn = nft_mapelem_activate, }; set->ops->walk(ctx, set, &iter); WARN_ON_ONCE(iter.err); nft_map_catchall_activate(ctx, set); } void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) { if (nft_set_is_anonymous(set)) { if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) nft_map_activate(ctx, set); nft_clear(ctx->net, set); } nft_use_inc_restore(&set->use); } EXPORT_SYMBOL_GPL(nf_tables_activate_set); void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase) { lockdep_commit_lock_is_held(ctx->net); switch (phase) { case NFT_TRANS_PREPARE_ERROR: nft_set_trans_unbind(ctx, set); if (nft_set_is_anonymous(set)) nft_deactivate_next(ctx->net, set); else list_del_rcu(&binding->list); nft_use_dec(&set->use); break; case NFT_TRANS_PREPARE: if (nft_set_is_anonymous(set)) { if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) nft_map_deactivate(ctx, set); nft_deactivate_next(ctx->net, set); } nft_use_dec(&set->use); return; case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: if (nft_set_is_anonymous(set) && set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) nft_map_deactivate(ctx, set); nft_use_dec(&set->use); fallthrough; default: nf_tables_unbind_set(ctx, set, binding, phase == NFT_TRANS_COMMIT); } } EXPORT_SYMBOL_GPL(nf_tables_deactivate_set); void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set) { if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) nft_set_destroy(ctx, set); } EXPORT_SYMBOL_GPL(nf_tables_destroy_set); const struct nft_set_ext_type nft_set_ext_types[] = { [NFT_SET_EXT_KEY] = { .align = __alignof__(u32), }, [NFT_SET_EXT_DATA] = { .align = __alignof__(u32), }, [NFT_SET_EXT_EXPRESSIONS] = { .align = __alignof__(struct nft_set_elem_expr), }, [NFT_SET_EXT_OBJREF] = { .len = sizeof(struct nft_object *), .align = __alignof__(struct nft_object *), }, [NFT_SET_EXT_FLAGS] = { .len = sizeof(u8), .align = __alignof__(u8), }, [NFT_SET_EXT_TIMEOUT] = { .len = sizeof(struct nft_timeout), .align = __alignof__(struct nft_timeout), }, [NFT_SET_EXT_USERDATA] = { .len = sizeof(struct nft_userdata), .align = __alignof__(struct nft_userdata), }, [NFT_SET_EXT_KEY_END] = { .align = __alignof__(u32), }, }; /* * Set elements */ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = { [NFTA_SET_ELEM_KEY] = { .type = NLA_NESTED }, [NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED }, [NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 }, [NFTA_SET_ELEM_TIMEOUT] = { .type = NLA_U64 }, [NFTA_SET_ELEM_EXPIRATION] = { .type = NLA_U64 }, [NFTA_SET_ELEM_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN }, [NFTA_SET_ELEM_EXPR] = { .type = NLA_NESTED }, [NFTA_SET_ELEM_OBJREF] = { .type = NLA_STRING, .len = NFT_OBJ_MAXNAMELEN - 1 }, [NFTA_SET_ELEM_KEY_END] = { .type = NLA_NESTED }, [NFTA_SET_ELEM_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy), }; static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = { [NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING, .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING, .len = NFT_SET_MAXNAMELEN - 1 }, [NFTA_SET_ELEM_LIST_ELEMENTS] = NLA_POLICY_NESTED_ARRAY(nft_set_elem_policy), [NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 }, }; static int nft_set_elem_expr_dump(struct sk_buff *skb, const struct nft_set *set, const struct nft_set_ext *ext, bool reset) { struct nft_set_elem_expr *elem_expr; u32 size, num_exprs = 0; struct nft_expr *expr; struct nlattr *nest; elem_expr = nft_set_ext_expr(ext); nft_setelem_expr_foreach(expr, elem_expr, size) num_exprs++; if (num_exprs == 1) { expr = nft_setelem_expr_at(elem_expr, 0); if (nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, expr, reset) < 0) return -1; return 0; } else if (num_exprs > 1) { nest = nla_nest_start_noflag(skb, NFTA_SET_ELEM_EXPRESSIONS); if (nest == NULL) goto nla_put_failure; nft_setelem_expr_foreach(expr, elem_expr, size) { expr = nft_setelem_expr_at(elem_expr, size); if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr, reset) < 0) goto nla_put_failure; } nla_nest_end(skb, nest); } return 0; nla_put_failure: return -1; } static int nf_tables_fill_setelem(struct sk_buff *skb, const struct nft_set *set, const struct nft_elem_priv *elem_priv, bool reset) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; nest = nla_nest_start_noflag(skb, NFTA_LIST_ELEM); if (nest == NULL) goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY) && nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext), NFT_DATA_VALUE, set->klen) < 0) goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END) && nft_data_dump(skb, NFTA_SET_ELEM_KEY_END, nft_set_ext_key_end(ext), NFT_DATA_VALUE, set->klen) < 0) goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext), nft_set_datatype(set), set->dlen) < 0) goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS) && nft_set_elem_expr_dump(skb, set, ext, reset)) goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) && nla_put_string(skb, NFTA_SET_ELEM_OBJREF, (*nft_set_ext_obj(ext))->key.name) < 0) goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(*nft_set_ext_flags(ext)))) goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) { u64 timeout = READ_ONCE(nft_set_ext_timeout(ext)->timeout); u64 set_timeout = READ_ONCE(set->timeout); __be64 msecs = 0; if (set_timeout != timeout) { msecs = nf_jiffies64_to_msecs(timeout); if (nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, msecs, NFTA_SET_ELEM_PAD)) goto nla_put_failure; } if (timeout > 0) { u64 expires, now = get_jiffies_64(); expires = READ_ONCE(nft_set_ext_timeout(ext)->expiration); if (time_before64(now, expires)) expires -= now; else expires = 0; if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION, nf_jiffies64_to_msecs(expires), NFTA_SET_ELEM_PAD)) goto nla_put_failure; } } if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) { struct nft_userdata *udata; udata = nft_set_ext_userdata(ext); if (nla_put(skb, NFTA_SET_ELEM_USERDATA, udata->len + 1, udata->data)) goto nla_put_failure; } nla_nest_end(skb, nest); return 0; nla_put_failure: nlmsg_trim(skb, b); return -EMSGSIZE; } struct nft_set_dump_args { const struct netlink_callback *cb; struct nft_set_iter iter; struct sk_buff *skb; bool reset; }; static int nf_tables_dump_setelem(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, struct nft_elem_priv *elem_priv) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); struct nft_set_dump_args *args; if (!nft_set_elem_active(ext, iter->genmask)) return 0; if (nft_set_elem_expired(ext) || nft_set_elem_is_dead(ext)) return 0; args = container_of(iter, struct nft_set_dump_args, iter); return nf_tables_fill_setelem(args->skb, set, elem_priv, args->reset); } static void audit_log_nft_set_reset(const struct nft_table *table, unsigned int base_seq, unsigned int nentries) { char *buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, base_seq); audit_log_nfcfg(buf, table->family, nentries, AUDIT_NFT_OP_SETELEM_RESET, GFP_ATOMIC); kfree(buf); } struct nft_set_dump_ctx { const struct nft_set *set; struct nft_ctx ctx; bool reset; }; static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb, const struct nft_set *set, bool reset, unsigned int base_seq) { struct nft_set_elem_catchall *catchall; u8 genmask = nft_genmask_cur(net); struct nft_set_ext *ext; int ret = 0; list_for_each_entry_rcu(catchall, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); if (!nft_set_elem_active(ext, genmask) || nft_set_elem_expired(ext)) continue; ret = nf_tables_fill_setelem(skb, set, catchall->elem, reset); if (reset && !ret) audit_log_nft_set_reset(set->table, base_seq, 1); break; } return ret; } static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) { struct nft_set_dump_ctx *dump_ctx = cb->data; struct net *net = sock_net(skb->sk); struct nftables_pernet *nft_net; struct nft_table *table; struct nft_set *set; struct nft_set_dump_args args; bool set_found = false; struct nlmsghdr *nlh; struct nlattr *nest; u32 portid, seq; int event; rcu_read_lock(); nft_net = nft_pernet(net); cb->seq = READ_ONCE(nft_net->base_seq); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (dump_ctx->ctx.family != NFPROTO_UNSPEC && dump_ctx->ctx.family != table->family) continue; if (table != dump_ctx->ctx.table) continue; list_for_each_entry_rcu(set, &table->sets, list) { if (set == dump_ctx->set) { set_found = true; break; } } break; } if (!set_found) { rcu_read_unlock(); return -ENOENT; } event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWSETELEM); portid = NETLINK_CB(cb->skb).portid; seq = cb->nlh->nlmsg_seq; nlh = nfnl_msg_put(skb, portid, seq, event, NLM_F_MULTI, table->family, NFNETLINK_V0, nft_base_seq(net)); if (!nlh) goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, table->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name)) goto nla_put_failure; nest = nla_nest_start_noflag(skb, NFTA_SET_ELEM_LIST_ELEMENTS); if (nest == NULL) goto nla_put_failure; args.cb = cb; args.skb = skb; args.reset = dump_ctx->reset; args.iter.genmask = nft_genmask_cur(net); args.iter.type = NFT_ITER_READ; args.iter.skip = cb->args[0]; args.iter.count = 0; args.iter.err = 0; args.iter.fn = nf_tables_dump_setelem; set->ops->walk(&dump_ctx->ctx, set, &args.iter); if (!args.iter.err && args.iter.count == cb->args[0]) args.iter.err = nft_set_catchall_dump(net, skb, set, dump_ctx->reset, cb->seq); nla_nest_end(skb, nest); nlmsg_end(skb, nlh); rcu_read_unlock(); if (args.iter.err && args.iter.err != -EMSGSIZE) return args.iter.err; if (args.iter.count == cb->args[0]) return 0; cb->args[0] = args.iter.count; return skb->len; nla_put_failure: rcu_read_unlock(); return -ENOSPC; } static int nf_tables_dumpreset_set(struct sk_buff *skb, struct netlink_callback *cb) { struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); struct nft_set_dump_ctx *dump_ctx = cb->data; int ret, skip = cb->args[0]; mutex_lock(&nft_net->commit_mutex); ret = nf_tables_dump_set(skb, cb); if (cb->args[0] > skip) audit_log_nft_set_reset(dump_ctx->ctx.table, cb->seq, cb->args[0] - skip); mutex_unlock(&nft_net->commit_mutex); return ret; } static int nf_tables_dump_set_start(struct netlink_callback *cb) { struct nft_set_dump_ctx *dump_ctx = cb->data; cb->data = kmemdup(dump_ctx, sizeof(*dump_ctx), GFP_ATOMIC); return cb->data ? 0 : -ENOMEM; } static int nf_tables_dump_set_done(struct netlink_callback *cb) { kfree(cb->data); return 0; } static int nf_tables_fill_setelem_info(struct sk_buff *skb, const struct nft_ctx *ctx, u32 seq, u32 portid, int event, u16 flags, const struct nft_set *set, const struct nft_elem_priv *elem_priv, bool reset) { struct nlmsghdr *nlh; struct nlattr *nest; int err; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family, NFNETLINK_V0, nft_base_seq(ctx->net)); if (!nlh) goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_NAME, set->name)) goto nla_put_failure; nest = nla_nest_start_noflag(skb, NFTA_SET_ELEM_LIST_ELEMENTS); if (nest == NULL) goto nla_put_failure; err = nf_tables_fill_setelem(skb, set, elem_priv, reset); if (err < 0) goto nla_put_failure; nla_nest_end(skb, nest); nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_trim(skb, nlh); return -1; } static int nft_setelem_parse_flags(const struct nft_set *set, const struct nlattr *attr, u32 *flags) { if (attr == NULL) return 0; *flags = ntohl(nla_get_be32(attr)); if (*flags & ~(NFT_SET_ELEM_INTERVAL_END | NFT_SET_ELEM_CATCHALL)) return -EOPNOTSUPP; if (!(set->flags & NFT_SET_INTERVAL) && *flags & NFT_SET_ELEM_INTERVAL_END) return -EINVAL; if ((*flags & (NFT_SET_ELEM_INTERVAL_END | NFT_SET_ELEM_CATCHALL)) == (NFT_SET_ELEM_INTERVAL_END | NFT_SET_ELEM_CATCHALL)) return -EINVAL; return 0; } static int nft_setelem_parse_key(struct nft_ctx *ctx, const struct nft_set *set, struct nft_data *key, struct nlattr *attr) { struct nft_data_desc desc = { .type = NFT_DATA_VALUE, .size = NFT_DATA_VALUE_MAXLEN, .len = set->klen, }; return nft_data_init(ctx, key, &desc, attr); } static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set, struct nft_data_desc *desc, struct nft_data *data, struct nlattr *attr) { u32 dtype; if (set->dtype == NFT_DATA_VERDICT) dtype = NFT_DATA_VERDICT; else dtype = NFT_DATA_VALUE; desc->type = dtype; desc->size = NFT_DATA_VALUE_MAXLEN; desc->len = set->dlen; desc->flags = NFT_DATA_DESC_SETELEM; return nft_data_init(ctx, data, desc, attr); } static void *nft_setelem_catchall_get(const struct net *net, const struct nft_set *set) { struct nft_set_elem_catchall *catchall; u8 genmask = nft_genmask_cur(net); struct nft_set_ext *ext; void *priv = NULL; list_for_each_entry_rcu(catchall, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); if (!nft_set_elem_active(ext, genmask) || nft_set_elem_expired(ext)) continue; priv = catchall->elem; break; } return priv; } static int nft_setelem_get(struct nft_ctx *ctx, const struct nft_set *set, struct nft_set_elem *elem, u32 flags) { void *priv; if (!(flags & NFT_SET_ELEM_CATCHALL)) { priv = set->ops->get(ctx->net, set, elem, flags); if (IS_ERR(priv)) return PTR_ERR(priv); } else { priv = nft_setelem_catchall_get(ctx->net, set); if (!priv) return -ENOENT; } elem->priv = priv; return 0; } static int nft_get_set_elem(struct nft_ctx *ctx, const struct nft_set *set, const struct nlattr *attr, bool reset) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_set_elem elem; struct sk_buff *skb; uint32_t flags = 0; int err; err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr, nft_set_elem_policy, NULL); if (err < 0) return err; err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags); if (err < 0) return err; if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL)) return -EINVAL; if (nla[NFTA_SET_ELEM_KEY]) { err = nft_setelem_parse_key(ctx, set, &elem.key.val, nla[NFTA_SET_ELEM_KEY]); if (err < 0) return err; } if (nla[NFTA_SET_ELEM_KEY_END]) { err = nft_setelem_parse_key(ctx, set, &elem.key_end.val, nla[NFTA_SET_ELEM_KEY_END]); if (err < 0) return err; } err = nft_setelem_get(ctx, set, &elem, flags); if (err < 0) return err; err = -ENOMEM; skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); if (skb == NULL) return err; err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid, NFT_MSG_NEWSETELEM, 0, set, elem.priv, reset); if (err < 0) goto err_fill_setelem; return nfnetlink_unicast(skb, ctx->net, ctx->portid); err_fill_setelem: kfree_skb(skb); return err; } static int nft_set_dump_ctx_init(struct nft_set_dump_ctx *dump_ctx, const struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[], bool reset) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); u8 family = info->nfmsg->nfgen_family; struct net *net = info->net; struct nft_table *table; struct nft_set *set; table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family, genmask, 0); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]); return PTR_ERR(table); } set = nft_set_lookup(net, table, nla[NFTA_SET_ELEM_LIST_SET], genmask); if (IS_ERR(set)) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]); return PTR_ERR(set); } nft_ctx_init(&dump_ctx->ctx, net, skb, info->nlh, family, table, NULL, nla); dump_ctx->set = set; dump_ctx->reset = reset; return 0; } /* called with rcu_read_lock held */ static int nf_tables_getsetelem(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct netlink_ext_ack *extack = info->extack; struct nft_set_dump_ctx dump_ctx; struct nlattr *attr; int rem, err = 0; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .start = nf_tables_dump_set_start, .dump = nf_tables_dump_set, .done = nf_tables_dump_set_done, .module = THIS_MODULE, }; err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, false); if (err) return err; c.data = &dump_ctx; return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) return -EINVAL; err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, false); if (err) return err; nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_get_set_elem(&dump_ctx.ctx, dump_ctx.set, attr, false); if (err < 0) { NL_SET_BAD_ATTR(extack, attr); break; } } return err; } static int nf_tables_getsetelem_reset(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct nftables_pernet *nft_net = nft_pernet(info->net); struct netlink_ext_ack *extack = info->extack; struct nft_set_dump_ctx dump_ctx; int rem, err = 0, nelems = 0; struct nlattr *attr; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .start = nf_tables_dump_set_start, .dump = nf_tables_dumpreset_set, .done = nf_tables_dump_set_done, .module = THIS_MODULE, }; err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, true); if (err) return err; c.data = &dump_ctx; return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) return -EINVAL; if (!try_module_get(THIS_MODULE)) return -EINVAL; rcu_read_unlock(); mutex_lock(&nft_net->commit_mutex); rcu_read_lock(); err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, true); if (err) goto out_unlock; nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_get_set_elem(&dump_ctx.ctx, dump_ctx.set, attr, true); if (err < 0) { NL_SET_BAD_ATTR(extack, attr); break; } nelems++; } audit_log_nft_set_reset(dump_ctx.ctx.table, nft_net->base_seq, nelems); out_unlock: rcu_read_unlock(); mutex_unlock(&nft_net->commit_mutex); rcu_read_lock(); module_put(THIS_MODULE); return err; } static void nf_tables_setelem_notify(const struct nft_ctx *ctx, const struct nft_set *set, const struct nft_elem_priv *elem_priv, int event) { struct nftables_pernet *nft_net; struct net *net = ctx->net; u32 portid = ctx->portid; struct sk_buff *skb; u16 flags = 0; int err; if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) return; skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) goto err; if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags, set, elem_priv, false); if (err < 0) { kfree_skb(skb); goto err; } nft_net = nft_pernet(net); nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); } static struct nft_trans *nft_trans_elem_alloc(const struct nft_ctx *ctx, int msg_type, struct nft_set *set) { struct nft_trans_elem *te; struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, struct_size(te, elems, 1)); if (trans == NULL) return NULL; te = nft_trans_container_elem(trans); te->nelems = 1; te->set = set; return trans; } struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx, const struct nft_set *set, const struct nlattr *attr) { struct nft_expr *expr; int err; expr = nft_expr_init(ctx, attr); if (IS_ERR(expr)) return expr; err = -EOPNOTSUPP; if (expr->ops->type->flags & NFT_EXPR_GC) { if (set->flags & NFT_SET_TIMEOUT) goto err_set_elem_expr; if (!set->ops->gc_init) goto err_set_elem_expr; set->ops->gc_init(set); } return expr; err_set_elem_expr: nft_expr_destroy(ctx, expr); return ERR_PTR(err); } static int nft_set_ext_check(const struct nft_set_ext_tmpl *tmpl, u8 id, u32 len) { len += nft_set_ext_types[id].len; if (len > tmpl->ext_len[id] || len > U8_MAX) return -1; return 0; } static int nft_set_ext_memcpy(const struct nft_set_ext_tmpl *tmpl, u8 id, void *to, const void *from, u32 len) { if (nft_set_ext_check(tmpl, id, len) < 0) return -1; memcpy(to, from, len); return 0; } struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const u32 *key, const u32 *key_end, const u32 *data, u64 timeout, u64 expiration, gfp_t gfp) { struct nft_set_ext *ext; void *elem; elem = kzalloc(set->ops->elemsize + tmpl->len, gfp); if (elem == NULL) return ERR_PTR(-ENOMEM); ext = nft_set_elem_ext(set, elem); nft_set_ext_init(ext, tmpl); if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY) && nft_set_ext_memcpy(tmpl, NFT_SET_EXT_KEY, nft_set_ext_key(ext), key, set->klen) < 0) goto err_ext_check; if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END) && nft_set_ext_memcpy(tmpl, NFT_SET_EXT_KEY_END, nft_set_ext_key_end(ext), key_end, set->klen) < 0) goto err_ext_check; if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && nft_set_ext_memcpy(tmpl, NFT_SET_EXT_DATA, nft_set_ext_data(ext), data, set->dlen) < 0) goto err_ext_check; if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) { nft_set_ext_timeout(ext)->timeout = timeout; if (expiration == 0) expiration = timeout; nft_set_ext_timeout(ext)->expiration = get_jiffies_64() + expiration; } return elem; err_ext_check: kfree(elem); return ERR_PTR(-EINVAL); } static void __nft_set_elem_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr) { if (expr->ops->destroy_clone) { expr->ops->destroy_clone(ctx, expr); module_put(expr->ops->type->owner); } else { nf_tables_expr_destroy(ctx, expr); } } static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx, struct nft_set_elem_expr *elem_expr) { struct nft_expr *expr; u32 size; nft_setelem_expr_foreach(expr, elem_expr, size) __nft_set_elem_expr_destroy(ctx, expr); } /* Drop references and destroy. Called from gc, dynset and abort path. */ static void __nft_set_elem_destroy(const struct nft_ctx *ctx, const struct nft_set *set, const struct nft_elem_priv *elem_priv, bool destroy_expr) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_release(nft_set_ext_data(ext), set->dtype); if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS)) nft_set_elem_expr_destroy(ctx, nft_set_ext_expr(ext)); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) nft_use_dec(&(*nft_set_ext_obj(ext))->use); kfree(elem_priv); } /* Drop references and destroy. Called from gc and dynset. */ void nft_set_elem_destroy(const struct nft_set *set, const struct nft_elem_priv *elem_priv, bool destroy_expr) { struct nft_ctx ctx = { .net = read_pnet(&set->net), .family = set->table->family, }; __nft_set_elem_destroy(&ctx, set, elem_priv, destroy_expr); } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); /* Drop references and destroy. Called from abort path. */ static void nft_trans_set_elem_destroy(const struct nft_ctx *ctx, struct nft_trans_elem *te) { int i; for (i = 0; i < te->nelems; i++) { /* skip update request, see nft_trans_elems_new_abort() */ if (!te->elems[i].priv) continue; __nft_set_elem_destroy(ctx, te->set, te->elems[i].priv, true); } } /* Destroy element. References have been already dropped in the preparation * path via nft_setelem_data_deactivate(). */ void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, const struct nft_set *set, const struct nft_elem_priv *elem_priv) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS)) nft_set_elem_expr_destroy(ctx, nft_set_ext_expr(ext)); kfree(elem_priv); } static void nft_trans_elems_destroy(const struct nft_ctx *ctx, const struct nft_trans_elem *te) { int i; for (i = 0; i < te->nelems; i++) nf_tables_set_elem_destroy(ctx, te->set, te->elems[i].priv); } int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, struct nft_expr *expr_array[]) { struct nft_expr *expr; int err, i, k; for (i = 0; i < set->num_exprs; i++) { expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL_ACCOUNT); if (!expr) goto err_expr; err = nft_expr_clone(expr, set->exprs[i], GFP_KERNEL_ACCOUNT); if (err < 0) { kfree(expr); goto err_expr; } expr_array[i] = expr; } return 0; err_expr: for (k = i - 1; k >= 0; k--) nft_expr_destroy(ctx, expr_array[k]); return -ENOMEM; } static int nft_set_elem_expr_setup(struct nft_ctx *ctx, const struct nft_set_ext_tmpl *tmpl, const struct nft_set_ext *ext, struct nft_expr *expr_array[], u32 num_exprs) { struct nft_set_elem_expr *elem_expr = nft_set_ext_expr(ext); u32 len = sizeof(struct nft_set_elem_expr); struct nft_expr *expr; int i, err; if (num_exprs == 0) return 0; for (i = 0; i < num_exprs; i++) len += expr_array[i]->ops->size; if (nft_set_ext_check(tmpl, NFT_SET_EXT_EXPRESSIONS, len) < 0) return -EINVAL; for (i = 0; i < num_exprs; i++) { expr = nft_setelem_expr_at(elem_expr, elem_expr->size); err = nft_expr_clone(expr, expr_array[i], GFP_KERNEL_ACCOUNT); if (err < 0) goto err_elem_expr_setup; elem_expr->size += expr_array[i]->ops->size; nft_expr_destroy(ctx, expr_array[i]); expr_array[i] = NULL; } return 0; err_elem_expr_setup: for (; i < num_exprs; i++) { nft_expr_destroy(ctx, expr_array[i]); expr_array[i] = NULL; } return -ENOMEM; } struct nft_set_ext *nft_set_catchall_lookup(const struct net *net, const struct nft_set *set) { struct nft_set_elem_catchall *catchall; u8 genmask = nft_genmask_cur(net); struct nft_set_ext *ext; list_for_each_entry_rcu(catchall, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); if (nft_set_elem_active(ext, genmask) && !nft_set_elem_expired(ext) && !nft_set_elem_is_dead(ext)) return ext; } return NULL; } EXPORT_SYMBOL_GPL(nft_set_catchall_lookup); static int nft_setelem_catchall_insert(const struct net *net, struct nft_set *set, const struct nft_set_elem *elem, struct nft_elem_priv **priv) { struct nft_set_elem_catchall *catchall; u8 genmask = nft_genmask_next(net); struct nft_set_ext *ext; list_for_each_entry(catchall, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); if (nft_set_elem_active(ext, genmask)) { *priv = catchall->elem; return -EEXIST; } } catchall = kmalloc(sizeof(*catchall), GFP_KERNEL_ACCOUNT); if (!catchall) return -ENOMEM; catchall->elem = elem->priv; list_add_tail_rcu(&catchall->list, &set->catchall_list); return 0; } static int nft_setelem_insert(const struct net *net, struct nft_set *set, const struct nft_set_elem *elem, struct nft_elem_priv **elem_priv, unsigned int flags) { int ret; if (flags & NFT_SET_ELEM_CATCHALL) ret = nft_setelem_catchall_insert(net, set, elem, elem_priv); else ret = set->ops->insert(net, set, elem, elem_priv); return ret; } static bool nft_setelem_is_catchall(const struct nft_set *set, const struct nft_elem_priv *elem_priv) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && *nft_set_ext_flags(ext) & NFT_SET_ELEM_CATCHALL) return true; return false; } static void nft_setelem_activate(struct net *net, struct nft_set *set, struct nft_elem_priv *elem_priv) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); if (nft_setelem_is_catchall(set, elem_priv)) { nft_clear(net, ext); } else { set->ops->activate(net, set, elem_priv); } } static void nft_trans_elem_update(const struct nft_set *set, const struct nft_trans_one_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); const struct nft_elem_update *update = elem->update; if (update->flags & NFT_TRANS_UPD_TIMEOUT) WRITE_ONCE(nft_set_ext_timeout(ext)->timeout, update->timeout); if (update->flags & NFT_TRANS_UPD_EXPIRATION) WRITE_ONCE(nft_set_ext_timeout(ext)->expiration, get_jiffies_64() + update->expiration); } static void nft_trans_elems_add(const struct nft_ctx *ctx, struct nft_trans_elem *te) { int i; for (i = 0; i < te->nelems; i++) { struct nft_trans_one_elem *elem = &te->elems[i]; if (elem->update) nft_trans_elem_update(te->set, elem); else nft_setelem_activate(ctx->net, te->set, elem->priv); nf_tables_setelem_notify(ctx, te->set, elem->priv, NFT_MSG_NEWSETELEM); kfree(elem->update); } } static int nft_setelem_catchall_deactivate(const struct net *net, struct nft_set *set, struct nft_set_elem *elem) { struct nft_set_elem_catchall *catchall; struct nft_set_ext *ext; list_for_each_entry(catchall, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); if (!nft_is_active_next(net, ext)) continue; kfree(elem->priv); elem->priv = catchall->elem; nft_set_elem_change_active(net, set, ext); return 0; } return -ENOENT; } static int __nft_setelem_deactivate(const struct net *net, struct nft_set *set, struct nft_set_elem *elem) { void *priv; priv = set->ops->deactivate(net, set, elem); if (!priv) return -ENOENT; kfree(elem->priv); elem->priv = priv; set->ndeact++; return 0; } static int nft_setelem_deactivate(const struct net *net, struct nft_set *set, struct nft_set_elem *elem, u32 flags) { int ret; if (flags & NFT_SET_ELEM_CATCHALL) ret = nft_setelem_catchall_deactivate(net, set, elem); else ret = __nft_setelem_deactivate(net, set, elem); return ret; } static void nft_setelem_catchall_destroy(struct nft_set_elem_catchall *catchall) { list_del_rcu(&catchall->list); kfree_rcu(catchall, rcu); } static void nft_setelem_catchall_remove(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) { struct nft_set_elem_catchall *catchall, *next; list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { if (catchall->elem == elem_priv) { nft_setelem_catchall_destroy(catchall); break; } } } static void nft_setelem_remove(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) { if (nft_setelem_is_catchall(set, elem_priv)) nft_setelem_catchall_remove(net, set, elem_priv); else set->ops->remove(net, set, elem_priv); } static void nft_trans_elems_remove(const struct nft_ctx *ctx, const struct nft_trans_elem *te) { int i; for (i = 0; i < te->nelems; i++) { WARN_ON_ONCE(te->elems[i].update); nf_tables_setelem_notify(ctx, te->set, te->elems[i].priv, te->nft_trans.msg_type); nft_setelem_remove(ctx->net, te->set, te->elems[i].priv); if (!nft_setelem_is_catchall(te->set, te->elems[i].priv)) { atomic_dec(&te->set->nelems); te->set->ndeact--; } } } static bool nft_setelem_valid_key_end(const struct nft_set *set, struct nlattr **nla, u32 flags) { if ((set->flags & (NFT_SET_CONCAT | NFT_SET_INTERVAL)) == (NFT_SET_CONCAT | NFT_SET_INTERVAL)) { if (flags & NFT_SET_ELEM_INTERVAL_END) return false; if (nla[NFTA_SET_ELEM_KEY_END] && flags & NFT_SET_ELEM_CATCHALL) return false; } else { if (nla[NFTA_SET_ELEM_KEY_END]) return false; } return true; } static u32 nft_set_maxsize(const struct nft_set *set) { u32 maxsize, delta; if (!set->size) return UINT_MAX; if (set->ops->adjust_maxsize) delta = set->ops->adjust_maxsize(set); else delta = 0; if (check_add_overflow(set->size, set->ndeact, &maxsize)) return UINT_MAX; if (check_add_overflow(maxsize, delta, &maxsize)) return UINT_MAX; return maxsize; } static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr, u32 nlmsg_flags) { struct nft_expr *expr_array[NFT_SET_EXPR_MAX] = {}; struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; u8 genmask = nft_genmask_next(ctx->net); u32 flags = 0, size = 0, num_exprs = 0; struct nft_set_ext_tmpl tmpl; struct nft_set_ext *ext, *ext2; struct nft_set_elem elem; struct nft_set_binding *binding; struct nft_elem_priv *elem_priv; struct nft_object *obj = NULL; struct nft_userdata *udata; struct nft_data_desc desc; enum nft_registers dreg; struct nft_trans *trans; u64 expiration; u64 timeout; int err, i; u8 ulen; err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr, nft_set_elem_policy, NULL); if (err < 0) return err; nft_set_ext_prepare(&tmpl); err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags); if (err < 0) return err; if (((flags & NFT_SET_ELEM_CATCHALL) && nla[NFTA_SET_ELEM_KEY]) || (!(flags & NFT_SET_ELEM_CATCHALL) && !nla[NFTA_SET_ELEM_KEY])) return -EINVAL; if (flags != 0) { err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); if (err < 0) return err; } if (set->flags & NFT_SET_MAP) { if (nla[NFTA_SET_ELEM_DATA] == NULL && !(flags & NFT_SET_ELEM_INTERVAL_END)) return -EINVAL; } else { if (nla[NFTA_SET_ELEM_DATA] != NULL) return -EINVAL; } if (set->flags & NFT_SET_OBJECT) { if (!nla[NFTA_SET_ELEM_OBJREF] && !(flags & NFT_SET_ELEM_INTERVAL_END)) return -EINVAL; } else { if (nla[NFTA_SET_ELEM_OBJREF]) return -EINVAL; } if (!nft_setelem_valid_key_end(set, nla, flags)) return -EINVAL; if ((flags & NFT_SET_ELEM_INTERVAL_END) && (nla[NFTA_SET_ELEM_DATA] || nla[NFTA_SET_ELEM_OBJREF] || nla[NFTA_SET_ELEM_TIMEOUT] || nla[NFTA_SET_ELEM_EXPIRATION] || nla[NFTA_SET_ELEM_USERDATA] || nla[NFTA_SET_ELEM_EXPR] || nla[NFTA_SET_ELEM_KEY_END] || nla[NFTA_SET_ELEM_EXPRESSIONS])) return -EINVAL; timeout = 0; if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) return -EINVAL; err = nf_msecs_to_jiffies64(nla[NFTA_SET_ELEM_TIMEOUT], &timeout); if (err) return err; } else if (set->flags & NFT_SET_TIMEOUT && !(flags & NFT_SET_ELEM_INTERVAL_END)) { timeout = set->timeout; } expiration = 0; if (nla[NFTA_SET_ELEM_EXPIRATION] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) return -EINVAL; if (timeout == 0) return -EOPNOTSUPP; err = nf_msecs_to_jiffies64(nla[NFTA_SET_ELEM_EXPIRATION], &expiration); if (err) return err; if (expiration > timeout) return -ERANGE; } if (nla[NFTA_SET_ELEM_EXPR]) { struct nft_expr *expr; if (set->num_exprs && set->num_exprs != 1) return -EOPNOTSUPP; expr = nft_set_elem_expr_alloc(ctx, set, nla[NFTA_SET_ELEM_EXPR]); if (IS_ERR(expr)) return PTR_ERR(expr); expr_array[0] = expr; num_exprs = 1; if (set->num_exprs && set->exprs[0]->ops != expr->ops) { err = -EOPNOTSUPP; goto err_set_elem_expr; } } else if (nla[NFTA_SET_ELEM_EXPRESSIONS]) { struct nft_expr *expr; struct nlattr *tmp; int left; i = 0; nla_for_each_nested(tmp, nla[NFTA_SET_ELEM_EXPRESSIONS], left) { if (i == NFT_SET_EXPR_MAX || (set->num_exprs && set->num_exprs == i)) { err = -E2BIG; goto err_set_elem_expr; } if (nla_type(tmp) != NFTA_LIST_ELEM) { err = -EINVAL; goto err_set_elem_expr; } expr = nft_set_elem_expr_alloc(ctx, set, tmp); if (IS_ERR(expr)) { err = PTR_ERR(expr); goto err_set_elem_expr; } expr_array[i] = expr; num_exprs++; if (set->num_exprs && expr->ops != set->exprs[i]->ops) { err = -EOPNOTSUPP; goto err_set_elem_expr; } i++; } if (set->num_exprs && set->num_exprs != i) { err = -EOPNOTSUPP; goto err_set_elem_expr; } } else if (set->num_exprs > 0 && !(flags & NFT_SET_ELEM_INTERVAL_END)) { err = nft_set_elem_expr_clone(ctx, set, expr_array); if (err < 0) goto err_set_elem_expr_clone; num_exprs = set->num_exprs; } if (nla[NFTA_SET_ELEM_KEY]) { err = nft_setelem_parse_key(ctx, set, &elem.key.val, nla[NFTA_SET_ELEM_KEY]); if (err < 0) goto err_set_elem_expr; err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); if (err < 0) goto err_parse_key; } if (nla[NFTA_SET_ELEM_KEY_END]) { err = nft_setelem_parse_key(ctx, set, &elem.key_end.val, nla[NFTA_SET_ELEM_KEY_END]); if (err < 0) goto err_parse_key; err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); if (err < 0) goto err_parse_key_end; } if (set->flags & NFT_SET_TIMEOUT) { err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); if (err < 0) goto err_parse_key_end; } if (num_exprs) { for (i = 0; i < num_exprs; i++) size += expr_array[i]->ops->size; err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS, sizeof(struct nft_set_elem_expr) + size); if (err < 0) goto err_parse_key_end; } if (nla[NFTA_SET_ELEM_OBJREF] != NULL) { obj = nft_obj_lookup(ctx->net, ctx->table, nla[NFTA_SET_ELEM_OBJREF], set->objtype, genmask); if (IS_ERR(obj)) { err = PTR_ERR(obj); obj = NULL; goto err_parse_key_end; } if (!nft_use_inc(&obj->use)) { err = -EMFILE; obj = NULL; goto err_parse_key_end; } err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); if (err < 0) goto err_parse_key_end; } if (nla[NFTA_SET_ELEM_DATA] != NULL) { err = nft_setelem_parse_data(ctx, set, &desc, &elem.data.val, nla[NFTA_SET_ELEM_DATA]); if (err < 0) goto err_parse_key_end; dreg = nft_type_to_reg(set->dtype); list_for_each_entry(binding, &set->bindings, list) { struct nft_ctx bind_ctx = { .net = ctx->net, .family = ctx->family, .table = ctx->table, .chain = (struct nft_chain *)binding->chain, }; if (!(binding->flags & NFT_SET_MAP)) continue; err = nft_validate_register_store(&bind_ctx, dreg, &elem.data.val, desc.type, desc.len); if (err < 0) goto err_parse_data; if (desc.type == NFT_DATA_VERDICT && (elem.data.val.verdict.code == NFT_GOTO || elem.data.val.verdict.code == NFT_JUMP)) nft_validate_state_update(ctx->table, NFT_VALIDATE_NEED); } err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len); if (err < 0) goto err_parse_data; } /* The full maximum length of userdata can exceed the maximum * offset value (U8_MAX) for following extensions, therefor it * must be the last extension added. */ ulen = 0; if (nla[NFTA_SET_ELEM_USERDATA] != NULL) { ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]); if (ulen > 0) { err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA, ulen); if (err < 0) goto err_parse_data; } } elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, elem.key_end.val.data, elem.data.val.data, timeout, expiration, GFP_KERNEL_ACCOUNT); if (IS_ERR(elem.priv)) { err = PTR_ERR(elem.priv); goto err_parse_data; } ext = nft_set_elem_ext(set, elem.priv); if (flags) *nft_set_ext_flags(ext) = flags; if (obj) *nft_set_ext_obj(ext) = obj; if (ulen > 0) { if (nft_set_ext_check(&tmpl, NFT_SET_EXT_USERDATA, ulen) < 0) { err = -EINVAL; goto err_elem_free; } udata = nft_set_ext_userdata(ext); udata->len = ulen - 1; nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen); } err = nft_set_elem_expr_setup(ctx, &tmpl, ext, expr_array, num_exprs); if (err < 0) goto err_elem_free; trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); if (trans == NULL) { err = -ENOMEM; goto err_elem_free; } ext->genmask = nft_genmask_cur(ctx->net); err = nft_setelem_insert(ctx->net, set, &elem, &elem_priv, flags); if (err) { if (err == -EEXIST) { ext2 = nft_set_elem_ext(set, elem_priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^ nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) || nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^ nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF)) goto err_element_clash; if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) && memcmp(nft_set_ext_data(ext), nft_set_ext_data(ext2), set->dlen) != 0) || (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) && nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF) && *nft_set_ext_obj(ext) != *nft_set_ext_obj(ext2))) goto err_element_clash; else if (!(nlmsg_flags & NLM_F_EXCL)) { err = 0; if (nft_set_ext_exists(ext2, NFT_SET_EXT_TIMEOUT)) { struct nft_elem_update update = { }; if (timeout != nft_set_ext_timeout(ext2)->timeout) { update.timeout = timeout; if (expiration == 0) expiration = timeout; update.flags |= NFT_TRANS_UPD_TIMEOUT; } if (expiration) { update.expiration = expiration; update.flags |= NFT_TRANS_UPD_EXPIRATION; } if (update.flags) { struct nft_trans_one_elem *ue; ue = &nft_trans_container_elem(trans)->elems[0]; ue->update = kmemdup(&update, sizeof(update), GFP_KERNEL); if (!ue->update) { err = -ENOMEM; goto err_element_clash; } ue->priv = elem_priv; nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL); goto err_elem_free; } } } } else if (err == -ENOTEMPTY) { /* ENOTEMPTY reports overlapping between this element * and an existing one. */ err = -EEXIST; } goto err_element_clash; } if (!(flags & NFT_SET_ELEM_CATCHALL)) { unsigned int max = nft_set_maxsize(set); if (!atomic_add_unless(&set->nelems, 1, max)) { err = -ENFILE; goto err_set_full; } } nft_trans_container_elem(trans)->elems[0].priv = elem.priv; nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL); return 0; err_set_full: nft_setelem_remove(ctx->net, set, elem.priv); err_element_clash: kfree(trans); err_elem_free: nf_tables_set_elem_destroy(ctx, set, elem.priv); err_parse_data: if (nla[NFTA_SET_ELEM_DATA] != NULL) nft_data_release(&elem.data.val, desc.type); err_parse_key_end: if (obj) nft_use_dec_restore(&obj->use); nft_data_release(&elem.key_end.val, NFT_DATA_VALUE); err_parse_key: nft_data_release(&elem.key.val, NFT_DATA_VALUE); err_set_elem_expr: for (i = 0; i < num_exprs && expr_array[i]; i++) nft_expr_destroy(ctx, expr_array[i]); err_set_elem_expr_clone: return err; } static int nf_tables_newsetelem(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); u8 family = info->nfmsg->nfgen_family; struct net *net = info->net; const struct nlattr *attr; struct nft_table *table; struct nft_set *set; struct nft_ctx ctx; int rem, err; if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) return -EINVAL; table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family, genmask, NETLINK_CB(skb).portid); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]); return PTR_ERR(table); } set = nft_set_lookup_global(net, table, nla[NFTA_SET_ELEM_LIST_SET], nla[NFTA_SET_ELEM_LIST_SET_ID], genmask); if (IS_ERR(set)) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]); return PTR_ERR(set); } if (!list_empty(&set->bindings) && (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) return -EBUSY; nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_add_set_elem(&ctx, set, attr, info->nlh->nlmsg_flags); if (err < 0) { NL_SET_BAD_ATTR(extack, attr); return err; } } if (table->validate_state == NFT_VALIDATE_DO) return nft_table_validate(net, table); return 0; } /** * nft_data_hold - hold a nft_data item * * @data: struct nft_data to release * @type: type of data * * Hold a nft_data item. NFT_DATA_VALUE types can be silently discarded, * NFT_DATA_VERDICT bumps the reference to chains in case of NFT_JUMP and * NFT_GOTO verdicts. This function must be called on active data objects * from the second phase of the commit protocol. */ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) { struct nft_chain *chain; if (type == NFT_DATA_VERDICT) { switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: chain = data->verdict.chain; nft_use_inc_restore(&chain->use); break; } } } static int nft_setelem_active_next(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); u8 genmask = nft_genmask_next(net); return nft_set_elem_active(ext, genmask); } static void nft_setelem_data_activate(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_hold(nft_set_ext_data(ext), set->dtype); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use); } void nft_setelem_data_deactivate(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_release(nft_set_ext_data(ext), set->dtype); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) nft_use_dec(&(*nft_set_ext_obj(ext))->use); } /* similar to nft_trans_elems_remove, but called from abort path to undo newsetelem. * No notifications and no ndeact changes. * * Returns true if set had been added to (i.e., elements need to be removed again). */ static bool nft_trans_elems_new_abort(const struct nft_ctx *ctx, struct nft_trans_elem *te) { bool removed = false; int i; for (i = 0; i < te->nelems; i++) { if (te->elems[i].update) { kfree(te->elems[i].update); te->elems[i].update = NULL; /* Update request, so do not release this element */ te->elems[i].priv = NULL; continue; } if (!te->set->ops->abort || nft_setelem_is_catchall(te->set, te->elems[i].priv)) nft_setelem_remove(ctx->net, te->set, te->elems[i].priv); if (!nft_setelem_is_catchall(te->set, te->elems[i].priv)) atomic_dec(&te->set->nelems); removed = true; } return removed; } /* Called from abort path to undo DELSETELEM/DESTROYSETELEM. */ static void nft_trans_elems_destroy_abort(const struct nft_ctx *ctx, const struct nft_trans_elem *te) { int i; for (i = 0; i < te->nelems; i++) { if (!nft_setelem_active_next(ctx->net, te->set, te->elems[i].priv)) { nft_setelem_data_activate(ctx->net, te->set, te->elems[i].priv); nft_setelem_activate(ctx->net, te->set, te->elems[i].priv); } if (!nft_setelem_is_catchall(te->set, te->elems[i].priv)) te->set->ndeact--; } } static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_set_ext_tmpl tmpl; struct nft_set_elem elem; struct nft_set_ext *ext; struct nft_trans *trans; u32 flags = 0; int err; err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr, nft_set_elem_policy, NULL); if (err < 0) return err; err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags); if (err < 0) return err; if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL)) return -EINVAL; if (!nft_setelem_valid_key_end(set, nla, flags)) return -EINVAL; nft_set_ext_prepare(&tmpl); if (flags != 0) { err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); if (err < 0) return err; } if (nla[NFTA_SET_ELEM_KEY]) { err = nft_setelem_parse_key(ctx, set, &elem.key.val, nla[NFTA_SET_ELEM_KEY]); if (err < 0) return err; err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); if (err < 0) goto fail_elem; } if (nla[NFTA_SET_ELEM_KEY_END]) { err = nft_setelem_parse_key(ctx, set, &elem.key_end.val, nla[NFTA_SET_ELEM_KEY_END]); if (err < 0) goto fail_elem; err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); if (err < 0) goto fail_elem_key_end; } err = -ENOMEM; elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, elem.key_end.val.data, NULL, 0, 0, GFP_KERNEL_ACCOUNT); if (IS_ERR(elem.priv)) { err = PTR_ERR(elem.priv); goto fail_elem_key_end; } ext = nft_set_elem_ext(set, elem.priv); if (flags) *nft_set_ext_flags(ext) = flags; trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); if (trans == NULL) goto fail_trans; err = nft_setelem_deactivate(ctx->net, set, &elem, flags); if (err < 0) goto fail_ops; nft_setelem_data_deactivate(ctx->net, set, elem.priv); nft_trans_container_elem(trans)->elems[0].priv = elem.priv; nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL); return 0; fail_ops: kfree(trans); fail_trans: kfree(elem.priv); fail_elem_key_end: nft_data_release(&elem.key_end.val, NFT_DATA_VALUE); fail_elem: nft_data_release(&elem.key.val, NFT_DATA_VALUE); return err; } static int nft_setelem_flush(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, struct nft_elem_priv *elem_priv) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); struct nft_trans *trans; if (!nft_set_elem_active(ext, iter->genmask)) return 0; trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM, struct_size_t(struct nft_trans_elem, elems, 1), GFP_ATOMIC); if (!trans) return -ENOMEM; set->ops->flush(ctx->net, set, elem_priv); set->ndeact++; nft_setelem_data_deactivate(ctx->net, set, elem_priv); nft_trans_elem_set(trans) = set; nft_trans_container_elem(trans)->nelems = 1; nft_trans_container_elem(trans)->elems[0].priv = elem_priv; nft_trans_commit_list_add_elem(ctx->net, trans, GFP_ATOMIC); return 0; } static int __nft_set_catchall_flush(const struct nft_ctx *ctx, struct nft_set *set, struct nft_elem_priv *elem_priv) { struct nft_trans *trans; trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); if (!trans) return -ENOMEM; nft_setelem_data_deactivate(ctx->net, set, elem_priv); nft_trans_container_elem(trans)->elems[0].priv = elem_priv; nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL); return 0; } static int nft_set_catchall_flush(const struct nft_ctx *ctx, struct nft_set *set) { u8 genmask = nft_genmask_next(ctx->net); struct nft_set_elem_catchall *catchall; struct nft_set_ext *ext; int ret = 0; list_for_each_entry_rcu(catchall, &set->catchall_list, list, lockdep_commit_lock_is_held(ctx->net)) { ext = nft_set_elem_ext(set, catchall->elem); if (!nft_set_elem_active(ext, genmask)) continue; ret = __nft_set_catchall_flush(ctx, set, catchall->elem); if (ret < 0) break; nft_set_elem_change_active(ctx->net, set, ext); } return ret; } static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask) { struct nft_set_iter iter = { .genmask = genmask, .type = NFT_ITER_UPDATE, .fn = nft_setelem_flush, }; set->ops->walk(ctx, set, &iter); if (!iter.err) iter.err = nft_set_catchall_flush(ctx, set); return iter.err; } static int nf_tables_delsetelem(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); u8 family = info->nfmsg->nfgen_family; struct net *net = info->net; const struct nlattr *attr; struct nft_table *table; struct nft_set *set; struct nft_ctx ctx; int rem, err = 0; table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family, genmask, NETLINK_CB(skb).portid); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]); return PTR_ERR(table); } set = nft_set_lookup(net, table, nla[NFTA_SET_ELEM_LIST_SET], genmask); if (IS_ERR(set)) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]); return PTR_ERR(set); } if (nft_set_is_anonymous(set)) return -EOPNOTSUPP; if (!list_empty(&set->bindings) && (set->flags & NFT_SET_CONSTANT)) return -EBUSY; nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) return nft_set_flush(&ctx, set, genmask); nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_del_setelem(&ctx, set, attr); if (err == -ENOENT && NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYSETELEM) continue; if (err < 0) { NL_SET_BAD_ATTR(extack, attr); return err; } } return 0; } /* * Stateful objects */ /** * nft_register_obj- register nf_tables stateful object type * @obj_type: object type * * Registers the object type for use with nf_tables. Returns zero on * success or a negative errno code otherwise. */ int nft_register_obj(struct nft_object_type *obj_type) { if (obj_type->type == NFT_OBJECT_UNSPEC) return -EINVAL; nfnl_lock(NFNL_SUBSYS_NFTABLES); list_add_rcu(&obj_type->list, &nf_tables_objects); nfnl_unlock(NFNL_SUBSYS_NFTABLES); return 0; } EXPORT_SYMBOL_GPL(nft_register_obj); /** * nft_unregister_obj - unregister nf_tables object type * @obj_type: object type * * Unregisters the object type for use with nf_tables. */ void nft_unregister_obj(struct nft_object_type *obj_type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); list_del_rcu(&obj_type->list); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_unregister_obj); struct nft_object *nft_obj_lookup(const struct net *net, const struct nft_table *table, const struct nlattr *nla, u32 objtype, u8 genmask) { struct nft_object_hash_key k = { .table = table }; char search[NFT_OBJ_MAXNAMELEN]; struct rhlist_head *tmp, *list; struct nft_object *obj; nla_strscpy(search, nla, sizeof(search)); k.name = search; WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_commit_lock_is_held(net)); rcu_read_lock(); list = rhltable_lookup(&nft_objname_ht, &k, nft_objname_ht_params); if (!list) goto out; rhl_for_each_entry_rcu(obj, tmp, list, rhlhead) { if (objtype == obj->ops->type->type && nft_active_genmask(obj, genmask)) { rcu_read_unlock(); return obj; } } out: rcu_read_unlock(); return ERR_PTR(-ENOENT); } EXPORT_SYMBOL_GPL(nft_obj_lookup); static struct nft_object *nft_obj_lookup_byhandle(const struct nft_table *table, const struct nlattr *nla, u32 objtype, u8 genmask) { struct nft_object *obj; list_for_each_entry(obj, &table->objects, list) { if (be64_to_cpu(nla_get_be64(nla)) == obj->handle && objtype == obj->ops->type->type && nft_active_genmask(obj, genmask)) return obj; } return ERR_PTR(-ENOENT); } static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = { [NFTA_OBJ_TABLE] = { .type = NLA_STRING, .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_OBJ_NAME] = { .type = NLA_STRING, .len = NFT_OBJ_MAXNAMELEN - 1 }, [NFTA_OBJ_TYPE] = { .type = NLA_U32 }, [NFTA_OBJ_DATA] = { .type = NLA_NESTED }, [NFTA_OBJ_HANDLE] = { .type = NLA_U64}, [NFTA_OBJ_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN }, }; static struct nft_object *nft_obj_init(const struct nft_ctx *ctx, const struct nft_object_type *type, const struct nlattr *attr) { struct nlattr **tb; const struct nft_object_ops *ops; struct nft_object *obj; int err = -ENOMEM; tb = kmalloc_array(type->maxattr + 1, sizeof(*tb), GFP_KERNEL); if (!tb) goto err1; if (attr) { err = nla_parse_nested_deprecated(tb, type->maxattr, attr, type->policy, NULL); if (err < 0) goto err2; } else { memset(tb, 0, sizeof(tb[0]) * (type->maxattr + 1)); } if (type->select_ops) { ops = type->select_ops(ctx, (const struct nlattr * const *)tb); if (IS_ERR(ops)) { err = PTR_ERR(ops); goto err2; } } else { ops = type->ops; } err = -ENOMEM; obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL_ACCOUNT); if (!obj) goto err2; err = ops->init(ctx, (const struct nlattr * const *)tb, obj); if (err < 0) goto err3; obj->ops = ops; kfree(tb); return obj; err3: kfree(obj); err2: kfree(tb); err1: return ERR_PTR(err); } static int nft_object_dump(struct sk_buff *skb, unsigned int attr, struct nft_object *obj, bool reset) { struct nlattr *nest; nest = nla_nest_start_noflag(skb, attr); if (!nest) goto nla_put_failure; if (obj->ops->dump(skb, obj, reset) < 0) goto nla_put_failure; nla_nest_end(skb, nest); return 0; nla_put_failure: return -1; } static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family) { const struct nft_object_type *type; list_for_each_entry_rcu(type, &nf_tables_objects, list) { if (type->family != NFPROTO_UNSPEC && type->family != family) continue; if (objtype == type->type) return type; } return NULL; } static const struct nft_object_type * nft_obj_type_get(struct net *net, u32 objtype, u8 family) { const struct nft_object_type *type; rcu_read_lock(); type = __nft_obj_type_get(objtype, family); if (type != NULL && try_module_get(type->owner)) { rcu_read_unlock(); return type; } rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES if (type == NULL) { if (nft_request_module(net, "nft-obj-%u", objtype) == -EAGAIN) return ERR_PTR(-EAGAIN); } #endif return ERR_PTR(-ENOENT); } static int nf_tables_updobj(const struct nft_ctx *ctx, const struct nft_object_type *type, const struct nlattr *attr, struct nft_object *obj) { struct nft_object *newobj; struct nft_trans *trans; int err = -ENOMEM; /* caller must have obtained type->owner reference. */ trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ, sizeof(struct nft_trans_obj)); if (!trans) goto err_trans; newobj = nft_obj_init(ctx, type, attr); if (IS_ERR(newobj)) { err = PTR_ERR(newobj); goto err_free_trans; } nft_trans_obj(trans) = obj; nft_trans_obj_update(trans) = true; nft_trans_obj_newobj(trans) = newobj; nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err_free_trans: kfree(trans); err_trans: module_put(type->owner); return err; } static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); u8 family = info->nfmsg->nfgen_family; const struct nft_object_type *type; struct net *net = info->net; struct nft_table *table; struct nft_object *obj; struct nft_ctx ctx; u32 objtype; int err; if (!nla[NFTA_OBJ_TYPE] || !nla[NFTA_OBJ_NAME] || !nla[NFTA_OBJ_DATA]) return -EINVAL; table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask, NETLINK_CB(skb).portid); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]); return PTR_ERR(table); } objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask); if (IS_ERR(obj)) { err = PTR_ERR(obj); if (err != -ENOENT) { NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]); return err; } } else { if (info->nlh->nlmsg_flags & NLM_F_EXCL) { NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]); return -EEXIST; } if (info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; if (!obj->ops->update) return 0; type = nft_obj_type_get(net, objtype, family); if (WARN_ON_ONCE(IS_ERR(type))) return PTR_ERR(type); nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); /* type->owner reference is put when transaction object is released. */ return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj); } nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); if (!nft_use_inc(&table->use)) return -EMFILE; type = nft_obj_type_get(net, objtype, family); if (IS_ERR(type)) { err = PTR_ERR(type); goto err_type; } obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err_init; } obj->key.table = table; obj->handle = nf_tables_alloc_handle(table); obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL_ACCOUNT); if (!obj->key.name) { err = -ENOMEM; goto err_strdup; } if (nla[NFTA_OBJ_USERDATA]) { obj->udata = nla_memdup(nla[NFTA_OBJ_USERDATA], GFP_KERNEL_ACCOUNT); if (obj->udata == NULL) goto err_userdata; obj->udlen = nla_len(nla[NFTA_OBJ_USERDATA]); } err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj); if (err < 0) goto err_trans; err = rhltable_insert(&nft_objname_ht, &obj->rhlhead, nft_objname_ht_params); if (err < 0) goto err_obj_ht; list_add_tail_rcu(&obj->list, &table->objects); return 0; err_obj_ht: /* queued in transaction log */ INIT_LIST_HEAD(&obj->list); return err; err_trans: kfree(obj->udata); err_userdata: kfree(obj->key.name); err_strdup: if (obj->ops->destroy) obj->ops->destroy(&ctx, obj); kfree(obj); err_init: module_put(type->owner); err_type: nft_use_dec_restore(&table->use); return err; } static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq, int event, u32 flags, int family, const struct nft_table *table, struct nft_object *obj, bool reset) { struct nlmsghdr *nlh; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, NFNETLINK_V0, nft_base_seq(net)); if (!nlh) goto nla_put_failure; if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) || nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) || nla_put_be64(skb, NFTA_OBJ_HANDLE, cpu_to_be64(obj->handle), NFTA_OBJ_PAD)) goto nla_put_failure; if (event == NFT_MSG_DELOBJ) { nlmsg_end(skb, nlh); return 0; } if (nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset)) goto nla_put_failure; if (obj->udata && nla_put(skb, NFTA_OBJ_USERDATA, obj->udlen, obj->udata)) goto nla_put_failure; nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_trim(skb, nlh); return -1; } static void audit_log_obj_reset(const struct nft_table *table, unsigned int base_seq, unsigned int nentries) { char *buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, base_seq); audit_log_nfcfg(buf, table->family, nentries, AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC); kfree(buf); } struct nft_obj_dump_ctx { unsigned int s_idx; char *table; u32 type; bool reset; }; static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nftables_pernet *nft_net; const struct nft_table *table; unsigned int entries = 0; struct nft_object *obj; unsigned int idx = 0; int rc = 0; rcu_read_lock(); nft_net = nft_pernet(net); cb->seq = READ_ONCE(nft_net->base_seq); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; entries = 0; list_for_each_entry_rcu(obj, &table->objects, list) { if (!nft_is_active(net, obj)) goto cont; if (idx < ctx->s_idx) goto cont; if (ctx->table && strcmp(ctx->table, table->name)) goto cont; if (ctx->type != NFT_OBJECT_UNSPEC && obj->ops->type->type != ctx->type) goto cont; rc = nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, NLM_F_MULTI | NLM_F_APPEND, table->family, table, obj, ctx->reset); if (rc < 0) break; entries++; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } if (ctx->reset && entries) audit_log_obj_reset(table, nft_net->base_seq, entries); if (rc < 0) break; } rcu_read_unlock(); ctx->s_idx = idx; return skb->len; } static int nf_tables_dumpreset_obj(struct sk_buff *skb, struct netlink_callback *cb) { struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); int ret; mutex_lock(&nft_net->commit_mutex); ret = nf_tables_dump_obj(skb, cb); mutex_unlock(&nft_net->commit_mutex); return ret; } static int nf_tables_dump_obj_start(struct netlink_callback *cb) { struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; const struct nlattr * const *nla = cb->data; BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); if (nla[NFTA_OBJ_TABLE]) { ctx->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC); if (!ctx->table) return -ENOMEM; } if (nla[NFTA_OBJ_TYPE]) ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); return 0; } static int nf_tables_dumpreset_obj_start(struct netlink_callback *cb) { struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; ctx->reset = true; return nf_tables_dump_obj_start(cb); } static int nf_tables_dump_obj_done(struct netlink_callback *cb) { struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; kfree(ctx->table); return 0; } /* Caller must hold rcu read lock or transaction mutex */ static struct sk_buff * nf_tables_getobj_single(u32 portid, const struct nfnl_info *info, const struct nlattr * const nla[], bool reset) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); u8 family = info->nfmsg->nfgen_family; const struct nft_table *table; struct net *net = info->net; struct nft_object *obj; struct sk_buff *skb2; u32 objtype; int err; if (!nla[NFTA_OBJ_NAME] || !nla[NFTA_OBJ_TYPE]) return ERR_PTR(-EINVAL); table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask, 0); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]); return ERR_CAST(table); } objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask); if (IS_ERR(obj)) { NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]); return ERR_CAST(obj); } skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) return ERR_PTR(-ENOMEM); err = nf_tables_fill_obj_info(skb2, net, portid, info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, family, table, obj, reset); if (err < 0) { kfree_skb(skb2); return ERR_PTR(err); } return skb2; } static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { u32 portid = NETLINK_CB(skb).portid; struct sk_buff *skb2; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .start = nf_tables_dump_obj_start, .dump = nf_tables_dump_obj, .done = nf_tables_dump_obj_done, .module = THIS_MODULE, .data = (void *)nla, }; return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } skb2 = nf_tables_getobj_single(portid, info, nla, false); if (IS_ERR(skb2)) return PTR_ERR(skb2); return nfnetlink_unicast(skb2, info->net, portid); } static int nf_tables_getobj_reset(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct nftables_pernet *nft_net = nft_pernet(info->net); u32 portid = NETLINK_CB(skb).portid; struct net *net = info->net; struct sk_buff *skb2; char *buf; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .start = nf_tables_dumpreset_obj_start, .dump = nf_tables_dumpreset_obj, .done = nf_tables_dump_obj_done, .module = THIS_MODULE, .data = (void *)nla, }; return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } if (!try_module_get(THIS_MODULE)) return -EINVAL; rcu_read_unlock(); mutex_lock(&nft_net->commit_mutex); skb2 = nf_tables_getobj_single(portid, info, nla, true); mutex_unlock(&nft_net->commit_mutex); rcu_read_lock(); module_put(THIS_MODULE); if (IS_ERR(skb2)) return PTR_ERR(skb2); buf = kasprintf(GFP_ATOMIC, "%.*s:%u", nla_len(nla[NFTA_OBJ_TABLE]), (char *)nla_data(nla[NFTA_OBJ_TABLE]), nft_net->base_seq); audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1, AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC); kfree(buf); return nfnetlink_unicast(skb2, net, portid); } static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj) { if (obj->ops->destroy) obj->ops->destroy(ctx, obj); module_put(obj->ops->type->owner); kfree(obj->key.name); kfree(obj->udata); kfree(obj); } static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); u8 family = info->nfmsg->nfgen_family; struct net *net = info->net; const struct nlattr *attr; struct nft_table *table; struct nft_object *obj; struct nft_ctx ctx; u32 objtype; if (!nla[NFTA_OBJ_TYPE] || (!nla[NFTA_OBJ_NAME] && !nla[NFTA_OBJ_HANDLE])) return -EINVAL; table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask, NETLINK_CB(skb).portid); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]); return PTR_ERR(table); } objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); if (nla[NFTA_OBJ_HANDLE]) { attr = nla[NFTA_OBJ_HANDLE]; obj = nft_obj_lookup_byhandle(table, attr, objtype, genmask); } else { attr = nla[NFTA_OBJ_NAME]; obj = nft_obj_lookup(net, table, attr, objtype, genmask); } if (IS_ERR(obj)) { if (PTR_ERR(obj) == -ENOENT && NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYOBJ) return 0; NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(obj); } if (obj->use > 0) { NL_SET_BAD_ATTR(extack, attr); return -EBUSY; } nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); return nft_delobj(&ctx, obj); } static void __nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, u16 flags, int family, int report, gfp_t gfp) { struct nftables_pernet *nft_net = nft_pernet(net); struct sk_buff *skb; int err; if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) return; skb = nlmsg_new(NLMSG_GOODSIZE, gfp); if (skb == NULL) goto err; err = nf_tables_fill_obj_info(skb, net, portid, seq, event, flags & (NLM_F_CREATE | NLM_F_EXCL), family, table, obj, false); if (err < 0) { kfree_skb(skb); goto err; } nft_notify_enqueue(skb, report, &nft_net->notify_list); return; err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); } void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, u16 flags, int family, int report, gfp_t gfp) { struct nftables_pernet *nft_net = nft_pernet(net); char *buf = kasprintf(gfp, "%s:%u", table->name, nft_net->base_seq); audit_log_nfcfg(buf, family, obj->handle, event == NFT_MSG_NEWOBJ ? AUDIT_NFT_OP_OBJ_REGISTER : AUDIT_NFT_OP_OBJ_UNREGISTER, gfp); kfree(buf); __nft_obj_notify(net, table, obj, portid, seq, event, flags, family, report, gfp); } EXPORT_SYMBOL_GPL(nft_obj_notify); static void nf_tables_obj_notify(const struct nft_ctx *ctx, struct nft_object *obj, int event) { __nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event, ctx->flags, ctx->family, ctx->report, GFP_KERNEL); } /* * Flow tables */ void nft_register_flowtable_type(struct nf_flowtable_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); list_add_tail_rcu(&type->list, &nf_tables_flowtables); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_register_flowtable_type); void nft_unregister_flowtable_type(struct nf_flowtable_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); list_del_rcu(&type->list); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_unregister_flowtable_type); static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = { [NFTA_FLOWTABLE_TABLE] = { .type = NLA_STRING, .len = NFT_NAME_MAXLEN - 1 }, [NFTA_FLOWTABLE_NAME] = { .type = NLA_STRING, .len = NFT_NAME_MAXLEN - 1 }, [NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED }, [NFTA_FLOWTABLE_HANDLE] = { .type = NLA_U64 }, [NFTA_FLOWTABLE_FLAGS] = { .type = NLA_U32 }, }; struct nft_flowtable *nft_flowtable_lookup(const struct net *net, const struct nft_table *table, const struct nlattr *nla, u8 genmask) { struct nft_flowtable *flowtable; list_for_each_entry_rcu(flowtable, &table->flowtables, list, lockdep_commit_lock_is_held(net)) { if (!nla_strcmp(nla, flowtable->name) && nft_active_genmask(flowtable, genmask)) return flowtable; } return ERR_PTR(-ENOENT); } EXPORT_SYMBOL_GPL(nft_flowtable_lookup); void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx, struct nft_flowtable *flowtable, enum nft_trans_phase phase) { switch (phase) { case NFT_TRANS_PREPARE_ERROR: case NFT_TRANS_PREPARE: case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: nft_use_dec(&flowtable->use); fallthrough; default: return; } } EXPORT_SYMBOL_GPL(nf_tables_deactivate_flowtable); static struct nft_flowtable * nft_flowtable_lookup_byhandle(const struct nft_table *table, const struct nlattr *nla, u8 genmask) { struct nft_flowtable *flowtable; list_for_each_entry(flowtable, &table->flowtables, list) { if (be64_to_cpu(nla_get_be64(nla)) == flowtable->handle && nft_active_genmask(flowtable, genmask)) return flowtable; } return ERR_PTR(-ENOENT); } struct nft_flowtable_hook { u32 num; int priority; struct list_head list; }; static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = { [NFTA_FLOWTABLE_HOOK_NUM] = { .type = NLA_U32 }, [NFTA_FLOWTABLE_HOOK_PRIORITY] = { .type = NLA_U32 }, [NFTA_FLOWTABLE_HOOK_DEVS] = { .type = NLA_NESTED }, }; static int nft_flowtable_parse_hook(const struct nft_ctx *ctx, const struct nlattr * const nla[], struct nft_flowtable_hook *flowtable_hook, struct nft_flowtable *flowtable, struct netlink_ext_ack *extack, bool add) { struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1]; struct nf_hook_ops *ops; struct nft_hook *hook; int hooknum, priority; int err; INIT_LIST_HEAD(&flowtable_hook->list); err = nla_parse_nested_deprecated(tb, NFTA_FLOWTABLE_HOOK_MAX, nla[NFTA_FLOWTABLE_HOOK], nft_flowtable_hook_policy, NULL); if (err < 0) return err; if (add) { if (!tb[NFTA_FLOWTABLE_HOOK_NUM] || !tb[NFTA_FLOWTABLE_HOOK_PRIORITY]) { NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]); return -ENOENT; } hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM])); if (hooknum != NF_NETDEV_INGRESS) return -EOPNOTSUPP; priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY])); flowtable_hook->priority = priority; flowtable_hook->num = hooknum; } else { if (tb[NFTA_FLOWTABLE_HOOK_NUM]) { hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM])); if (hooknum != flowtable->hooknum) return -EOPNOTSUPP; } if (tb[NFTA_FLOWTABLE_HOOK_PRIORITY]) { priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY])); if (priority != flowtable->data.priority) return -EOPNOTSUPP; } flowtable_hook->priority = flowtable->data.priority; flowtable_hook->num = flowtable->hooknum; } if (tb[NFTA_FLOWTABLE_HOOK_DEVS]) { err = nf_tables_parse_netdev_hooks(ctx->net, tb[NFTA_FLOWTABLE_HOOK_DEVS], &flowtable_hook->list, extack); if (err < 0) return err; } list_for_each_entry(hook, &flowtable_hook->list, list) { list_for_each_entry(ops, &hook->ops_list, list) { ops->pf = NFPROTO_NETDEV; ops->hooknum = flowtable_hook->num; ops->priority = flowtable_hook->priority; ops->priv = &flowtable->data; ops->hook = flowtable->data.type->hook; } } return err; } /* call under rcu_read_lock */ static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family) { const struct nf_flowtable_type *type; list_for_each_entry_rcu(type, &nf_tables_flowtables, list) { if (family == type->family) return type; } return NULL; } static const struct nf_flowtable_type * nft_flowtable_type_get(struct net *net, u8 family) { const struct nf_flowtable_type *type; rcu_read_lock(); type = __nft_flowtable_type_get(family); if (type != NULL && try_module_get(type->owner)) { rcu_read_unlock(); return type; } rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES if (type == NULL) { if (nft_request_module(net, "nf-flowtable-%u", family) == -EAGAIN) return ERR_PTR(-EAGAIN); } #endif return ERR_PTR(-ENOENT); } /* Only called from error and netdev event paths. */ static void nft_unregister_flowtable_ops(struct net *net, struct nft_flowtable *flowtable, struct nf_hook_ops *ops) { nf_unregister_net_hook(net, ops); flowtable->data.type->setup(&flowtable->data, ops->dev, FLOW_BLOCK_UNBIND); } static void __nft_unregister_flowtable_net_hooks(struct net *net, struct nft_flowtable *flowtable, struct list_head *hook_list, bool release_netdev) { struct nft_hook *hook, *next; struct nf_hook_ops *ops; list_for_each_entry_safe(hook, next, hook_list, list) { list_for_each_entry(ops, &hook->ops_list, list) nft_unregister_flowtable_ops(net, flowtable, ops); if (release_netdev) { list_del(&hook->list); nft_netdev_hook_free_rcu(hook); } } } static void nft_unregister_flowtable_net_hooks(struct net *net, struct nft_flowtable *flowtable, struct list_head *hook_list) { __nft_unregister_flowtable_net_hooks(net, flowtable, hook_list, false); } static int nft_register_flowtable_ops(struct net *net, struct nft_flowtable *flowtable, struct nf_hook_ops *ops) { int err; err = flowtable->data.type->setup(&flowtable->data, ops->dev, FLOW_BLOCK_BIND); if (err < 0) return err; err = nf_register_net_hook(net, ops); if (!err) return 0; flowtable->data.type->setup(&flowtable->data, ops->dev, FLOW_BLOCK_UNBIND); return err; } static int nft_register_flowtable_net_hooks(struct net *net, struct nft_table *table, struct list_head *hook_list, struct nft_flowtable *flowtable) { struct nft_hook *hook, *next; struct nft_flowtable *ft; struct nf_hook_ops *ops; int err, i = 0; list_for_each_entry(hook, hook_list, list) { list_for_each_entry(ft, &table->flowtables, list) { if (!nft_is_active_next(net, ft)) continue; if (nft_hook_list_find(&ft->hook_list, hook)) { err = -EEXIST; goto err_unregister_net_hooks; } } list_for_each_entry(ops, &hook->ops_list, list) { err = nft_register_flowtable_ops(net, flowtable, ops); if (err < 0) goto err_unregister_net_hooks; i++; } } return 0; err_unregister_net_hooks: list_for_each_entry_safe(hook, next, hook_list, list) { list_for_each_entry(ops, &hook->ops_list, list) { if (i-- <= 0) break; nft_unregister_flowtable_ops(net, flowtable, ops); } list_del_rcu(&hook->list); nft_netdev_hook_free_rcu(hook); } return err; } static void nft_hooks_destroy(struct list_head *hook_list) { struct nft_hook *hook, *next; list_for_each_entry_safe(hook, next, hook_list, list) { list_del_rcu(&hook->list); nft_netdev_hook_free_rcu(hook); } } static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, struct nft_flowtable *flowtable, struct netlink_ext_ack *extack) { const struct nlattr * const *nla = ctx->nla; struct nft_flowtable_hook flowtable_hook; struct nft_hook *hook, *next; struct nf_hook_ops *ops; struct nft_trans *trans; bool unregister = false; u32 flags; int err; err = nft_flowtable_parse_hook(ctx, nla, &flowtable_hook, flowtable, extack, false); if (err < 0) return err; list_for_each_entry_safe(hook, next, &flowtable_hook.list, list) { if (nft_hook_list_find(&flowtable->hook_list, hook)) { list_del(&hook->list); nft_netdev_hook_free(hook); } } if (nla[NFTA_FLOWTABLE_FLAGS]) { flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS])); if (flags & ~NFT_FLOWTABLE_MASK) { err = -EOPNOTSUPP; goto err_flowtable_update_hook; } if ((flowtable->data.flags & NFT_FLOWTABLE_HW_OFFLOAD) ^ (flags & NFT_FLOWTABLE_HW_OFFLOAD)) { err = -EOPNOTSUPP; goto err_flowtable_update_hook; } } else { flags = flowtable->data.flags; } err = nft_register_flowtable_net_hooks(ctx->net, ctx->table, &flowtable_hook.list, flowtable); if (err < 0) goto err_flowtable_update_hook; trans = nft_trans_alloc(ctx, NFT_MSG_NEWFLOWTABLE, sizeof(struct nft_trans_flowtable)); if (!trans) { unregister = true; err = -ENOMEM; goto err_flowtable_update_hook; } nft_trans_flowtable_flags(trans) = flags; nft_trans_flowtable(trans) = flowtable; nft_trans_flowtable_update(trans) = true; INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans)); list_splice(&flowtable_hook.list, &nft_trans_flowtable_hooks(trans)); nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err_flowtable_update_hook: list_for_each_entry_safe(hook, next, &flowtable_hook.list, list) { if (unregister) { list_for_each_entry(ops, &hook->ops_list, list) nft_unregister_flowtable_ops(ctx->net, flowtable, ops); } list_del_rcu(&hook->list); nft_netdev_hook_free_rcu(hook); } return err; } static int nf_tables_newflowtable(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct netlink_ext_ack *extack = info->extack; struct nft_flowtable_hook flowtable_hook; u8 genmask = nft_genmask_next(info->net); u8 family = info->nfmsg->nfgen_family; const struct nf_flowtable_type *type; struct nft_flowtable *flowtable; struct net *net = info->net; struct nft_table *table; struct nft_trans *trans; struct nft_ctx ctx; int err; if (!nla[NFTA_FLOWTABLE_TABLE] || !nla[NFTA_FLOWTABLE_NAME] || !nla[NFTA_FLOWTABLE_HOOK]) return -EINVAL; table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family, genmask, NETLINK_CB(skb).portid); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]); return PTR_ERR(table); } flowtable = nft_flowtable_lookup(net, table, nla[NFTA_FLOWTABLE_NAME], genmask); if (IS_ERR(flowtable)) { err = PTR_ERR(flowtable); if (err != -ENOENT) { NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]); return err; } } else { if (info->nlh->nlmsg_flags & NLM_F_EXCL) { NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]); return -EEXIST; } nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); return nft_flowtable_update(&ctx, info->nlh, flowtable, extack); } nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); if (!nft_use_inc(&table->use)) return -EMFILE; flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL_ACCOUNT); if (!flowtable) { err = -ENOMEM; goto flowtable_alloc; } flowtable->table = table; flowtable->handle = nf_tables_alloc_handle(table); INIT_LIST_HEAD(&flowtable->hook_list); flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL_ACCOUNT); if (!flowtable->name) { err = -ENOMEM; goto err1; } type = nft_flowtable_type_get(net, family); if (IS_ERR(type)) { err = PTR_ERR(type); goto err2; } if (nla[NFTA_FLOWTABLE_FLAGS]) { flowtable->data.flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS])); if (flowtable->data.flags & ~NFT_FLOWTABLE_MASK) { err = -EOPNOTSUPP; goto err3; } } write_pnet(&flowtable->data.net, net); flowtable->data.type = type; err = type->init(&flowtable->data); if (err < 0) goto err3; err = nft_flowtable_parse_hook(&ctx, nla, &flowtable_hook, flowtable, extack, true); if (err < 0) goto err_flowtable_parse_hooks; list_splice(&flowtable_hook.list, &flowtable->hook_list); flowtable->data.priority = flowtable_hook.priority; flowtable->hooknum = flowtable_hook.num; trans = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); if (IS_ERR(trans)) { err = PTR_ERR(trans); goto err_flowtable_trans; } /* This must be LAST to ensure no packets are walking over this flowtable. */ err = nft_register_flowtable_net_hooks(ctx.net, table, &flowtable->hook_list, flowtable); if (err < 0) goto err_flowtable_hooks; list_add_tail_rcu(&flowtable->list, &table->flowtables); return 0; err_flowtable_hooks: nft_trans_destroy(trans); err_flowtable_trans: nft_hooks_destroy(&flowtable->hook_list); err_flowtable_parse_hooks: flowtable->data.type->free(&flowtable->data); err3: module_put(type->owner); err2: kfree(flowtable->name); err1: kfree(flowtable); flowtable_alloc: nft_use_dec_restore(&table->use); return err; } static void nft_flowtable_hook_release(struct nft_flowtable_hook *flowtable_hook) { struct nft_hook *this, *next; list_for_each_entry_safe(this, next, &flowtable_hook->list, list) { list_del(&this->list); nft_netdev_hook_free(this); } } static int nft_delflowtable_hook(struct nft_ctx *ctx, struct nft_flowtable *flowtable, struct netlink_ext_ack *extack) { const struct nlattr * const *nla = ctx->nla; struct nft_flowtable_hook flowtable_hook; LIST_HEAD(flowtable_del_list); struct nft_hook *this, *hook; struct nft_trans *trans; int err; err = nft_flowtable_parse_hook(ctx, nla, &flowtable_hook, flowtable, extack, false); if (err < 0) return err; list_for_each_entry(this, &flowtable_hook.list, list) { hook = nft_hook_list_find(&flowtable->hook_list, this); if (!hook) { err = -ENOENT; goto err_flowtable_del_hook; } list_move(&hook->list, &flowtable_del_list); } trans = nft_trans_alloc(ctx, NFT_MSG_DELFLOWTABLE, sizeof(struct nft_trans_flowtable)); if (!trans) { err = -ENOMEM; goto err_flowtable_del_hook; } nft_trans_flowtable(trans) = flowtable; nft_trans_flowtable_update(trans) = true; INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans)); list_splice(&flowtable_del_list, &nft_trans_flowtable_hooks(trans)); nft_flowtable_hook_release(&flowtable_hook); nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err_flowtable_del_hook: list_splice(&flowtable_del_list, &flowtable->hook_list); nft_flowtable_hook_release(&flowtable_hook); return err; } static int nf_tables_delflowtable(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); u8 family = info->nfmsg->nfgen_family; struct nft_flowtable *flowtable; struct net *net = info->net; const struct nlattr *attr; struct nft_table *table; struct nft_ctx ctx; if (!nla[NFTA_FLOWTABLE_TABLE] || (!nla[NFTA_FLOWTABLE_NAME] && !nla[NFTA_FLOWTABLE_HANDLE])) return -EINVAL; table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family, genmask, NETLINK_CB(skb).portid); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]); return PTR_ERR(table); } if (nla[NFTA_FLOWTABLE_HANDLE]) { attr = nla[NFTA_FLOWTABLE_HANDLE]; flowtable = nft_flowtable_lookup_byhandle(table, attr, genmask); } else { attr = nla[NFTA_FLOWTABLE_NAME]; flowtable = nft_flowtable_lookup(net, table, attr, genmask); } if (IS_ERR(flowtable)) { if (PTR_ERR(flowtable) == -ENOENT && NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYFLOWTABLE) return 0; NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(flowtable); } nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); if (nla[NFTA_FLOWTABLE_HOOK]) return nft_delflowtable_hook(&ctx, flowtable, extack); if (flowtable->use > 0) { NL_SET_BAD_ATTR(extack, attr); return -EBUSY; } return nft_delflowtable(&ctx, flowtable); } static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq, int event, u32 flags, int family, struct nft_flowtable *flowtable, struct list_head *hook_list) { struct nlattr *nest, *nest_devs; struct nft_hook *hook; struct nlmsghdr *nlh; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, NFNETLINK_V0, nft_base_seq(net)); if (!nlh) goto nla_put_failure; if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) || nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) || nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle), NFTA_FLOWTABLE_PAD)) goto nla_put_failure; if (event == NFT_MSG_DELFLOWTABLE && !hook_list) { nlmsg_end(skb, nlh); return 0; } if (nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) || nla_put_be32(skb, NFTA_FLOWTABLE_FLAGS, htonl(flowtable->data.flags))) goto nla_put_failure; nest = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK); if (!nest) goto nla_put_failure; if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) || nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->data.priority))) goto nla_put_failure; nest_devs = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK_DEVS); if (!nest_devs) goto nla_put_failure; if (!hook_list) hook_list = &flowtable->hook_list; list_for_each_entry_rcu(hook, hook_list, list, lockdep_commit_lock_is_held(net)) { if (nla_put(skb, NFTA_DEVICE_NAME, hook->ifnamelen, hook->ifname)) goto nla_put_failure; } nla_nest_end(skb, nest_devs); nla_nest_end(skb, nest); nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_trim(skb, nlh); return -1; } struct nft_flowtable_filter { char *table; }; static int nf_tables_dump_flowtable(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); struct nft_flowtable_filter *filter = cb->data; unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nft_flowtable *flowtable; struct nftables_pernet *nft_net; const struct nft_table *table; rcu_read_lock(); nft_net = nft_pernet(net); cb->seq = READ_ONCE(nft_net->base_seq); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; list_for_each_entry_rcu(flowtable, &table->flowtables, list) { if (!nft_is_active(net, flowtable)) goto cont; if (idx < s_idx) goto cont; if (idx > s_idx) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); if (filter && filter->table && strcmp(filter->table, table->name)) goto cont; if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFT_MSG_NEWFLOWTABLE, NLM_F_MULTI | NLM_F_APPEND, table->family, flowtable, NULL) < 0) goto done; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } } done: rcu_read_unlock(); cb->args[0] = idx; return skb->len; } static int nf_tables_dump_flowtable_start(struct netlink_callback *cb) { const struct nlattr * const *nla = cb->data; struct nft_flowtable_filter *filter = NULL; if (nla[NFTA_FLOWTABLE_TABLE]) { filter = kzalloc(sizeof(*filter), GFP_ATOMIC); if (!filter) return -ENOMEM; filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE], GFP_ATOMIC); if (!filter->table) { kfree(filter); return -ENOMEM; } } cb->data = filter; return 0; } static int nf_tables_dump_flowtable_done(struct netlink_callback *cb) { struct nft_flowtable_filter *filter = cb->data; if (!filter) return 0; kfree(filter->table); kfree(filter); return 0; } /* called with rcu_read_lock held */ static int nf_tables_getflowtable(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); u8 family = info->nfmsg->nfgen_family; struct nft_flowtable *flowtable; const struct nft_table *table; struct net *net = info->net; struct sk_buff *skb2; int err; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .start = nf_tables_dump_flowtable_start, .dump = nf_tables_dump_flowtable, .done = nf_tables_dump_flowtable_done, .module = THIS_MODULE, .data = (void *)nla, }; return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } if (!nla[NFTA_FLOWTABLE_NAME]) return -EINVAL; table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family, genmask, 0); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]); return PTR_ERR(table); } flowtable = nft_flowtable_lookup(net, table, nla[NFTA_FLOWTABLE_NAME], genmask); if (IS_ERR(flowtable)) { NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]); return PTR_ERR(flowtable); } skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) return -ENOMEM; err = nf_tables_fill_flowtable_info(skb2, net, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFT_MSG_NEWFLOWTABLE, 0, family, flowtable, NULL); if (err < 0) goto err_fill_flowtable_info; return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); err_fill_flowtable_info: kfree_skb(skb2); return err; } static void nf_tables_flowtable_notify(struct nft_ctx *ctx, struct nft_flowtable *flowtable, struct list_head *hook_list, int event) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); struct sk_buff *skb; u16 flags = 0; int err; if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return; skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) goto err; if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid, ctx->seq, event, flags, ctx->family, flowtable, hook_list); if (err < 0) { kfree_skb(skb); goto err; } nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); } static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) { struct nft_hook *hook, *next; flowtable->data.type->free(&flowtable->data); list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { list_del_rcu(&hook->list); nft_netdev_hook_free_rcu(hook); } kfree(flowtable->name); module_put(flowtable->data.type->owner); kfree(flowtable); } static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq) { struct nftables_pernet *nft_net = nft_pernet(net); struct nlmsghdr *nlh; char buf[TASK_COMM_LEN]; int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN); nlh = nfnl_msg_put(skb, portid, seq, event, 0, AF_UNSPEC, NFNETLINK_V0, nft_base_seq(net)); if (!nlh) goto nla_put_failure; if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_net->base_seq)) || nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) || nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current))) goto nla_put_failure; nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_trim(skb, nlh); return -EMSGSIZE; } struct nf_hook_ops *nft_hook_find_ops(const struct nft_hook *hook, const struct net_device *dev) { struct nf_hook_ops *ops; list_for_each_entry(ops, &hook->ops_list, list) { if (ops->dev == dev) return ops; } return NULL; } EXPORT_SYMBOL_GPL(nft_hook_find_ops); struct nf_hook_ops *nft_hook_find_ops_rcu(const struct nft_hook *hook, const struct net_device *dev) { struct nf_hook_ops *ops; list_for_each_entry_rcu(ops, &hook->ops_list, list) { if (ops->dev == dev) return ops; } return NULL; } EXPORT_SYMBOL_GPL(nft_hook_find_ops_rcu); static void nf_tables_device_notify(const struct nft_table *table, int attr, const char *name, const struct nft_hook *hook, const struct net_device *dev, int event) { struct net *net = dev_net(dev); struct nlmsghdr *nlh; struct sk_buff *skb; u16 flags = 0; if (!nfnetlink_has_listeners(net, NFNLGRP_NFT_DEV)) return; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) goto err; event = event == NETDEV_REGISTER ? NFT_MSG_NEWDEV : NFT_MSG_DELDEV; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nfnl_msg_put(skb, 0, 0, event, flags, table->family, NFNETLINK_V0, nft_base_seq(net)); if (!nlh) goto err; if (nla_put_string(skb, NFTA_DEVICE_TABLE, table->name) || nla_put_string(skb, attr, name) || nla_put(skb, NFTA_DEVICE_SPEC, hook->ifnamelen, hook->ifname) || nla_put_string(skb, NFTA_DEVICE_NAME, dev->name)) goto err; nlmsg_end(skb, nlh); nfnetlink_send(skb, net, 0, NFNLGRP_NFT_DEV, nlmsg_report(nlh), GFP_KERNEL); return; err: if (skb) kfree_skb(skb); nfnetlink_set_err(net, 0, NFNLGRP_NFT_DEV, -ENOBUFS); } void nf_tables_chain_device_notify(const struct nft_chain *chain, const struct nft_hook *hook, const struct net_device *dev, int event) { nf_tables_device_notify(chain->table, NFTA_DEVICE_CHAIN, chain->name, hook, dev, event); } static void nf_tables_flowtable_device_notify(const struct nft_flowtable *ft, const struct nft_hook *hook, const struct net_device *dev, int event) { nf_tables_device_notify(ft->table, NFTA_DEVICE_FLOWTABLE, ft->name, hook, dev, event); } static int nft_flowtable_event(unsigned long event, struct net_device *dev, struct nft_flowtable *flowtable, bool changename) { struct nf_hook_ops *ops; struct nft_hook *hook; bool match; list_for_each_entry(hook, &flowtable->hook_list, list) { ops = nft_hook_find_ops(hook, dev); match = !strncmp(hook->ifname, dev->name, hook->ifnamelen); switch (event) { case NETDEV_UNREGISTER: /* NOP if not found or new name still matching */ if (!ops || (changename && match)) continue; /* flow_offload_netdev_event() cleans up entries for us. */ nft_unregister_flowtable_ops(dev_net(dev), flowtable, ops); list_del_rcu(&ops->list); kfree_rcu(ops, rcu); break; case NETDEV_REGISTER: /* NOP if not matching or already registered */ if (!match || (changename && ops)) continue; ops = kzalloc(sizeof(struct nf_hook_ops), GFP_KERNEL_ACCOUNT); if (!ops) return 1; ops->pf = NFPROTO_NETDEV; ops->hooknum = flowtable->hooknum; ops->priority = flowtable->data.priority; ops->priv = &flowtable->data; ops->hook = flowtable->data.type->hook; ops->dev = dev; if (nft_register_flowtable_ops(dev_net(dev), flowtable, ops)) { kfree(ops); return 1; } list_add_tail_rcu(&ops->list, &hook->ops_list); break; } nf_tables_flowtable_device_notify(flowtable, hook, dev, event); break; } return 0; } static int __nf_tables_flowtable_event(unsigned long event, struct net_device *dev, bool changename) { struct nftables_pernet *nft_net = nft_pernet(dev_net(dev)); struct nft_flowtable *flowtable; struct nft_table *table; list_for_each_entry(table, &nft_net->tables, list) { list_for_each_entry(flowtable, &table->flowtables, list) { if (nft_flowtable_event(event, dev, flowtable, changename)) return 1; } } return 0; } static int nf_tables_flowtable_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct nftables_pernet *nft_net; int ret = NOTIFY_DONE; struct net *net; if (event != NETDEV_REGISTER && event != NETDEV_UNREGISTER && event != NETDEV_CHANGENAME) return NOTIFY_DONE; net = dev_net(dev); nft_net = nft_pernet(net); mutex_lock(&nft_net->commit_mutex); if (event == NETDEV_CHANGENAME) { if (__nf_tables_flowtable_event(NETDEV_REGISTER, dev, true)) { ret = NOTIFY_BAD; goto out_unlock; } __nf_tables_flowtable_event(NETDEV_UNREGISTER, dev, true); } else if (__nf_tables_flowtable_event(event, dev, false)) { ret = NOTIFY_BAD; } out_unlock: mutex_unlock(&nft_net->commit_mutex); return ret; } static struct notifier_block nf_tables_flowtable_notifier = { .notifier_call = nf_tables_flowtable_event, }; static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event) { struct nlmsghdr *nlh = nlmsg_hdr(skb); struct sk_buff *skb2; int err; if (!nlmsg_report(nlh) && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) return; skb2 = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (skb2 == NULL) goto err; err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid, nlh->nlmsg_seq); if (err < 0) { kfree_skb(skb2); goto err; } nfnetlink_send(skb2, net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES, nlmsg_report(nlh), GFP_KERNEL); return; err: nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES, -ENOBUFS); } static int nf_tables_getgen(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { struct sk_buff *skb2; int err; skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (skb2 == NULL) return -ENOMEM; err = nf_tables_fill_gen_info(skb2, info->net, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq); if (err < 0) goto err_fill_gen_info; return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); err_fill_gen_info: kfree_skb(skb2); return err; } static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { [NFT_MSG_NEWTABLE] = { .call = nf_tables_newtable, .type = NFNL_CB_BATCH, .attr_count = NFTA_TABLE_MAX, .policy = nft_table_policy, }, [NFT_MSG_GETTABLE] = { .call = nf_tables_gettable, .type = NFNL_CB_RCU, .attr_count = NFTA_TABLE_MAX, .policy = nft_table_policy, }, [NFT_MSG_DELTABLE] = { .call = nf_tables_deltable, .type = NFNL_CB_BATCH, .attr_count = NFTA_TABLE_MAX, .policy = nft_table_policy, }, [NFT_MSG_DESTROYTABLE] = { .call = nf_tables_deltable, .type = NFNL_CB_BATCH, .attr_count = NFTA_TABLE_MAX, .policy = nft_table_policy, }, [NFT_MSG_NEWCHAIN] = { .call = nf_tables_newchain, .type = NFNL_CB_BATCH, .attr_count = NFTA_CHAIN_MAX, .policy = nft_chain_policy, }, [NFT_MSG_GETCHAIN] = { .call = nf_tables_getchain, .type = NFNL_CB_RCU, .attr_count = NFTA_CHAIN_MAX, .policy = nft_chain_policy, }, [NFT_MSG_DELCHAIN] = { .call = nf_tables_delchain, .type = NFNL_CB_BATCH, .attr_count = NFTA_CHAIN_MAX, .policy = nft_chain_policy, }, [NFT_MSG_DESTROYCHAIN] = { .call = nf_tables_delchain, .type = NFNL_CB_BATCH, .attr_count = NFTA_CHAIN_MAX, .policy = nft_chain_policy, }, [NFT_MSG_NEWRULE] = { .call = nf_tables_newrule, .type = NFNL_CB_BATCH, .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, }, [NFT_MSG_GETRULE] = { .call = nf_tables_getrule, .type = NFNL_CB_RCU, .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, }, [NFT_MSG_GETRULE_RESET] = { .call = nf_tables_getrule_reset, .type = NFNL_CB_RCU, .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, }, [NFT_MSG_DELRULE] = { .call = nf_tables_delrule, .type = NFNL_CB_BATCH, .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, }, [NFT_MSG_DESTROYRULE] = { .call = nf_tables_delrule, .type = NFNL_CB_BATCH, .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, }, [NFT_MSG_NEWSET] = { .call = nf_tables_newset, .type = NFNL_CB_BATCH, .attr_count = NFTA_SET_MAX, .policy = nft_set_policy, }, [NFT_MSG_GETSET] = { .call = nf_tables_getset, .type = NFNL_CB_RCU, .attr_count = NFTA_SET_MAX, .policy = nft_set_policy, }, [NFT_MSG_DELSET] = { .call = nf_tables_delset, .type = NFNL_CB_BATCH, .attr_count = NFTA_SET_MAX, .policy = nft_set_policy, }, [NFT_MSG_DESTROYSET] = { .call = nf_tables_delset, .type = NFNL_CB_BATCH, .attr_count = NFTA_SET_MAX, .policy = nft_set_policy, }, [NFT_MSG_NEWSETELEM] = { .call = nf_tables_newsetelem, .type = NFNL_CB_BATCH, .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, [NFT_MSG_GETSETELEM] = { .call = nf_tables_getsetelem, .type = NFNL_CB_RCU, .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, [NFT_MSG_GETSETELEM_RESET] = { .call = nf_tables_getsetelem_reset, .type = NFNL_CB_RCU, .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, [NFT_MSG_DELSETELEM] = { .call = nf_tables_delsetelem, .type = NFNL_CB_BATCH, .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, [NFT_MSG_DESTROYSETELEM] = { .call = nf_tables_delsetelem, .type = NFNL_CB_BATCH, .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, [NFT_MSG_GETGEN] = { .call = nf_tables_getgen, .type = NFNL_CB_RCU, }, [NFT_MSG_NEWOBJ] = { .call = nf_tables_newobj, .type = NFNL_CB_BATCH, .attr_count = NFTA_OBJ_MAX, .policy = nft_obj_policy, }, [NFT_MSG_GETOBJ] = { .call = nf_tables_getobj, .type = NFNL_CB_RCU, .attr_count = NFTA_OBJ_MAX, .policy = nft_obj_policy, }, [NFT_MSG_DELOBJ] = { .call = nf_tables_delobj, .type = NFNL_CB_BATCH, .attr_count = NFTA_OBJ_MAX, .policy = nft_obj_policy, }, [NFT_MSG_DESTROYOBJ] = { .call = nf_tables_delobj, .type = NFNL_CB_BATCH, .attr_count = NFTA_OBJ_MAX, .policy = nft_obj_policy, }, [NFT_MSG_GETOBJ_RESET] = { .call = nf_tables_getobj_reset, .type = NFNL_CB_RCU, .attr_count = NFTA_OBJ_MAX, .policy = nft_obj_policy, }, [NFT_MSG_NEWFLOWTABLE] = { .call = nf_tables_newflowtable, .type = NFNL_CB_BATCH, .attr_count = NFTA_FLOWTABLE_MAX, .policy = nft_flowtable_policy, }, [NFT_MSG_GETFLOWTABLE] = { .call = nf_tables_getflowtable, .type = NFNL_CB_RCU, .attr_count = NFTA_FLOWTABLE_MAX, .policy = nft_flowtable_policy, }, [NFT_MSG_DELFLOWTABLE] = { .call = nf_tables_delflowtable, .type = NFNL_CB_BATCH, .attr_count = NFTA_FLOWTABLE_MAX, .policy = nft_flowtable_policy, }, [NFT_MSG_DESTROYFLOWTABLE] = { .call = nf_tables_delflowtable, .type = NFNL_CB_BATCH, .attr_count = NFTA_FLOWTABLE_MAX, .policy = nft_flowtable_policy, }, }; static int nf_tables_validate(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_table *table; list_for_each_entry(table, &nft_net->tables, list) { switch (table->validate_state) { case NFT_VALIDATE_SKIP: continue; case NFT_VALIDATE_NEED: nft_validate_state_update(table, NFT_VALIDATE_DO); fallthrough; case NFT_VALIDATE_DO: if (nft_table_validate(net, table) < 0) return -EAGAIN; nft_validate_state_update(table, NFT_VALIDATE_SKIP); break; } } return 0; } /* a drop policy has to be deferred until all rules have been activated, * otherwise a large ruleset that contains a drop-policy base chain will * cause all packets to get dropped until the full transaction has been * processed. * * We defer the drop policy until the transaction has been finalized. */ static void nft_chain_commit_drop_policy(struct nft_trans_chain *trans) { struct nft_base_chain *basechain; if (trans->policy != NF_DROP) return; if (!nft_is_base_chain(trans->chain)) return; basechain = nft_base_chain(trans->chain); basechain->policy = NF_DROP; } static void nft_chain_commit_update(struct nft_trans_chain *trans) { struct nft_table *table = trans->nft_trans_binding.nft_trans.table; struct nft_base_chain *basechain; if (trans->name) { rhltable_remove(&table->chains_ht, &trans->chain->rhlhead, nft_chain_ht_params); swap(trans->chain->name, trans->name); rhltable_insert_key(&table->chains_ht, trans->chain->name, &trans->chain->rhlhead, nft_chain_ht_params); } if (!nft_is_base_chain(trans->chain)) return; nft_chain_stats_replace(trans); basechain = nft_base_chain(trans->chain); switch (trans->policy) { case NF_DROP: case NF_ACCEPT: basechain->policy = trans->policy; break; } } static void nft_obj_commit_update(const struct nft_ctx *ctx, struct nft_trans *trans) { struct nft_object *newobj; struct nft_object *obj; obj = nft_trans_obj(trans); newobj = nft_trans_obj_newobj(trans); if (WARN_ON_ONCE(!obj->ops->update)) return; obj->ops->update(obj, newobj); nft_obj_destroy(ctx, newobj); } static void nft_commit_release(struct nft_trans *trans) { struct nft_ctx ctx = { .net = trans->net, }; nft_ctx_update(&ctx, trans); switch (trans->msg_type) { case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: nf_tables_table_destroy(trans->table); break; case NFT_MSG_NEWCHAIN: free_percpu(nft_trans_chain_stats(trans)); kfree(nft_trans_chain_name(trans)); break; case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: if (nft_trans_chain_update(trans)) nft_hooks_destroy(&nft_trans_chain_hooks(trans)); else nf_tables_chain_destroy(nft_trans_chain(trans)); break; case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: nf_tables_rule_destroy(&ctx, nft_trans_rule(trans)); break; case NFT_MSG_DELSET: case NFT_MSG_DESTROYSET: nft_set_destroy(&ctx, nft_trans_set(trans)); break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: nft_trans_elems_destroy(&ctx, nft_trans_container_elem(trans)); break; case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: nft_obj_destroy(&ctx, nft_trans_obj(trans)); break; case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: if (nft_trans_flowtable_update(trans)) nft_hooks_destroy(&nft_trans_flowtable_hooks(trans)); else nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); break; } if (trans->put_net) put_net(trans->net); kfree(trans); } static void nf_tables_trans_destroy_work(struct work_struct *w) { struct nftables_pernet *nft_net = container_of(w, struct nftables_pernet, destroy_work); struct nft_trans *trans, *next; LIST_HEAD(head); spin_lock(&nf_tables_destroy_list_lock); list_splice_init(&nft_net->destroy_list, &head); spin_unlock(&nf_tables_destroy_list_lock); if (list_empty(&head)) return; synchronize_rcu(); list_for_each_entry_safe(trans, next, &head, list) { nft_trans_list_del(trans); nft_commit_release(trans); } } void nf_tables_trans_destroy_flush_work(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); flush_work(&nft_net->destroy_work); } EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); static bool nft_expr_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { return false; } static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain) { const struct nft_expr *expr, *last; struct nft_regs_track track = {}; unsigned int size, data_size; void *data, *data_boundary; struct nft_rule_dp *prule; struct nft_rule *rule; /* already handled or inactive chain? */ if (chain->blob_next || !nft_is_active_next(net, chain)) return 0; data_size = 0; list_for_each_entry(rule, &chain->rules, list) { if (nft_is_active_next(net, rule)) { data_size += sizeof(*prule) + rule->dlen; if (data_size > INT_MAX) return -ENOMEM; } } chain->blob_next = nf_tables_chain_alloc_rules(chain, data_size); if (!chain->blob_next) return -ENOMEM; data = (void *)chain->blob_next->data; data_boundary = data + data_size; size = 0; list_for_each_entry(rule, &chain->rules, list) { if (!nft_is_active_next(net, rule)) continue; prule = (struct nft_rule_dp *)data; data += offsetof(struct nft_rule_dp, data); if (WARN_ON_ONCE(data > data_boundary)) return -ENOMEM; size = 0; track.last = nft_expr_last(rule); nft_rule_for_each_expr(expr, last, rule) { track.cur = expr; if (nft_expr_reduce(&track, expr)) { expr = track.cur; continue; } if (WARN_ON_ONCE(data + size + expr->ops->size > data_boundary)) return -ENOMEM; memcpy(data + size, expr, expr->ops->size); size += expr->ops->size; } if (WARN_ON_ONCE(size >= 1 << 12)) return -ENOMEM; prule->handle = rule->handle; prule->dlen = size; prule->is_last = 0; data += size; size = 0; chain->blob_next->size += (unsigned long)(data - (void *)prule); } if (WARN_ON_ONCE(data > data_boundary)) return -ENOMEM; prule = (struct nft_rule_dp *)data; nft_last_rule(chain, prule); return 0; } static void nf_tables_commit_chain_prepare_cancel(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans *trans, *next; list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { if (trans->msg_type == NFT_MSG_NEWRULE || trans->msg_type == NFT_MSG_DELRULE) { struct nft_chain *chain = nft_trans_rule_chain(trans); kvfree(chain->blob_next); chain->blob_next = NULL; } } } static void __nf_tables_commit_chain_free_rules(struct rcu_head *h) { struct nft_rule_dp_last *l = container_of(h, struct nft_rule_dp_last, h); kvfree(l->blob); } static void nf_tables_commit_chain_free_rules_old(struct nft_rule_blob *blob) { struct nft_rule_dp_last *last; /* last rule trailer is after end marker */ last = (void *)blob + sizeof(*blob) + blob->size; last->blob = blob; call_rcu(&last->h, __nf_tables_commit_chain_free_rules); } static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain) { struct nft_rule_blob *g0, *g1; bool next_genbit; next_genbit = nft_gencursor_next(net); g0 = rcu_dereference_protected(chain->blob_gen_0, lockdep_commit_lock_is_held(net)); g1 = rcu_dereference_protected(chain->blob_gen_1, lockdep_commit_lock_is_held(net)); /* No changes to this chain? */ if (chain->blob_next == NULL) { /* chain had no change in last or next generation */ if (g0 == g1) return; /* * chain had no change in this generation; make sure next * one uses same rules as current generation. */ if (next_genbit) { rcu_assign_pointer(chain->blob_gen_1, g0); nf_tables_commit_chain_free_rules_old(g1); } else { rcu_assign_pointer(chain->blob_gen_0, g1); nf_tables_commit_chain_free_rules_old(g0); } return; } if (next_genbit) rcu_assign_pointer(chain->blob_gen_1, chain->blob_next); else rcu_assign_pointer(chain->blob_gen_0, chain->blob_next); chain->blob_next = NULL; if (g0 == g1) return; if (next_genbit) nf_tables_commit_chain_free_rules_old(g1); else nf_tables_commit_chain_free_rules_old(g0); } static void nft_obj_del(struct nft_object *obj) { rhltable_remove(&nft_objname_ht, &obj->rhlhead, nft_objname_ht_params); list_del_rcu(&obj->list); } void nft_chain_del(struct nft_chain *chain) { struct nft_table *table = chain->table; WARN_ON_ONCE(rhltable_remove(&table->chains_ht, &chain->rhlhead, nft_chain_ht_params)); list_del_rcu(&chain->list); } static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx, struct nft_trans_gc *trans) { struct nft_elem_priv **priv = trans->priv; unsigned int i; for (i = 0; i < trans->count; i++) { nft_setelem_data_deactivate(ctx->net, trans->set, priv[i]); nft_setelem_remove(ctx->net, trans->set, priv[i]); } } void nft_trans_gc_destroy(struct nft_trans_gc *trans) { nft_set_put(trans->set); put_net(trans->net); kfree(trans); } static void nft_trans_gc_trans_free(struct rcu_head *rcu) { struct nft_elem_priv *elem_priv; struct nft_trans_gc *trans; struct nft_ctx ctx = {}; unsigned int i; trans = container_of(rcu, struct nft_trans_gc, rcu); ctx.net = read_pnet(&trans->set->net); for (i = 0; i < trans->count; i++) { elem_priv = trans->priv[i]; if (!nft_setelem_is_catchall(trans->set, elem_priv)) atomic_dec(&trans->set->nelems); nf_tables_set_elem_destroy(&ctx, trans->set, elem_priv); } nft_trans_gc_destroy(trans); } static bool nft_trans_gc_work_done(struct nft_trans_gc *trans) { struct nftables_pernet *nft_net; struct nft_ctx ctx = {}; nft_net = nft_pernet(trans->net); mutex_lock(&nft_net->commit_mutex); /* Check for race with transaction, otherwise this batch refers to * stale objects that might not be there anymore. Skip transaction if * set has been destroyed from control plane transaction in case gc * worker loses race. */ if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) { mutex_unlock(&nft_net->commit_mutex); return false; } ctx.net = trans->net; ctx.table = trans->set->table; nft_trans_gc_setelem_remove(&ctx, trans); mutex_unlock(&nft_net->commit_mutex); return true; } static void nft_trans_gc_work(struct work_struct *work) { struct nft_trans_gc *trans, *next; LIST_HEAD(trans_gc_list); spin_lock(&nf_tables_gc_list_lock); list_splice_init(&nf_tables_gc_list, &trans_gc_list); spin_unlock(&nf_tables_gc_list_lock); list_for_each_entry_safe(trans, next, &trans_gc_list, list) { list_del(&trans->list); if (!nft_trans_gc_work_done(trans)) { nft_trans_gc_destroy(trans); continue; } call_rcu(&trans->rcu, nft_trans_gc_trans_free); } } struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, unsigned int gc_seq, gfp_t gfp) { struct net *net = read_pnet(&set->net); struct nft_trans_gc *trans; trans = kzalloc(sizeof(*trans), gfp); if (!trans) return NULL; trans->net = maybe_get_net(net); if (!trans->net) { kfree(trans); return NULL; } refcount_inc(&set->refs); trans->set = set; trans->seq = gc_seq; return trans; } void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv) { trans->priv[trans->count++] = priv; } static void nft_trans_gc_queue_work(struct nft_trans_gc *trans) { spin_lock(&nf_tables_gc_list_lock); list_add_tail(&trans->list, &nf_tables_gc_list); spin_unlock(&nf_tables_gc_list_lock); schedule_work(&trans_gc_work); } static int nft_trans_gc_space(struct nft_trans_gc *trans) { return NFT_TRANS_GC_BATCHCOUNT - trans->count; } struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, unsigned int gc_seq, gfp_t gfp) { struct nft_set *set; if (nft_trans_gc_space(gc)) return gc; set = gc->set; nft_trans_gc_queue_work(gc); return nft_trans_gc_alloc(set, gc_seq, gfp); } void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans) { if (trans->count == 0) { nft_trans_gc_destroy(trans); return; } nft_trans_gc_queue_work(trans); } struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp) { struct nft_set *set; if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net))) return NULL; if (nft_trans_gc_space(gc)) return gc; set = gc->set; call_rcu(&gc->rcu, nft_trans_gc_trans_free); return nft_trans_gc_alloc(set, 0, gfp); } void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) { WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net)); if (trans->count == 0) { nft_trans_gc_destroy(trans); return; } call_rcu(&trans->rcu, nft_trans_gc_trans_free); } struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, unsigned int gc_seq) { struct nft_set_elem_catchall *catchall; const struct nft_set *set = gc->set; struct nft_set_ext *ext; list_for_each_entry_rcu(catchall, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); if (!nft_set_elem_expired(ext)) continue; if (nft_set_elem_is_dead(ext)) goto dead_elem; nft_set_elem_dead(ext); dead_elem: gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); if (!gc) return NULL; nft_trans_gc_elem_add(gc, catchall->elem); } return gc; } struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc) { struct nft_set_elem_catchall *catchall, *next; u64 tstamp = nft_net_tstamp(gc->net); const struct nft_set *set = gc->set; struct nft_elem_priv *elem_priv; struct nft_set_ext *ext; WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)); list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); if (!__nft_set_elem_expired(ext, tstamp)) continue; gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); if (!gc) return NULL; elem_priv = catchall->elem; nft_setelem_data_deactivate(gc->net, gc->set, elem_priv); nft_setelem_catchall_destroy(catchall); nft_trans_gc_elem_add(gc, elem_priv); } return gc; } static void nf_tables_module_autoload_cleanup(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_module_request *req, *next; WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); list_for_each_entry_safe(req, next, &nft_net->module_list, list) { WARN_ON_ONCE(!req->done); list_del(&req->list); kfree(req); } } static void nf_tables_commit_release(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans *trans; /* all side effects have to be made visible. * For example, if a chain named 'foo' has been deleted, a * new transaction must not find it anymore. * * Memory reclaim happens asynchronously from work queue * to prevent expensive synchronize_rcu() in commit phase. */ if (list_empty(&nft_net->commit_list)) { nf_tables_module_autoload_cleanup(net); mutex_unlock(&nft_net->commit_mutex); return; } trans = list_last_entry(&nft_net->commit_list, struct nft_trans, list); get_net(trans->net); WARN_ON_ONCE(trans->put_net); trans->put_net = true; spin_lock(&nf_tables_destroy_list_lock); list_splice_tail_init(&nft_net->commit_list, &nft_net->destroy_list); spin_unlock(&nf_tables_destroy_list_lock); nf_tables_module_autoload_cleanup(net); schedule_work(&nft_net->destroy_work); mutex_unlock(&nft_net->commit_mutex); } static void nft_commit_notify(struct net *net, u32 portid) { struct nftables_pernet *nft_net = nft_pernet(net); struct sk_buff *batch_skb = NULL, *nskb, *skb; unsigned char *data; int len; list_for_each_entry_safe(skb, nskb, &nft_net->notify_list, list) { if (!batch_skb) { new_batch: batch_skb = skb; len = NLMSG_GOODSIZE - skb->len; list_del(&skb->list); continue; } len -= skb->len; if (len > 0 && NFT_CB(skb).report == NFT_CB(batch_skb).report) { data = skb_put(batch_skb, skb->len); memcpy(data, skb->data, skb->len); list_del(&skb->list); kfree_skb(skb); continue; } nfnetlink_send(batch_skb, net, portid, NFNLGRP_NFTABLES, NFT_CB(batch_skb).report, GFP_KERNEL); goto new_batch; } if (batch_skb) { nfnetlink_send(batch_skb, net, portid, NFNLGRP_NFTABLES, NFT_CB(batch_skb).report, GFP_KERNEL); } WARN_ON_ONCE(!list_empty(&nft_net->notify_list)); } static int nf_tables_commit_audit_alloc(struct list_head *adl, struct nft_table *table) { struct nft_audit_data *adp; list_for_each_entry(adp, adl, list) { if (adp->table == table) return 0; } adp = kzalloc(sizeof(*adp), GFP_KERNEL); if (!adp) return -ENOMEM; adp->table = table; list_add(&adp->list, adl); return 0; } static void nf_tables_commit_audit_free(struct list_head *adl) { struct nft_audit_data *adp, *adn; list_for_each_entry_safe(adp, adn, adl, list) { list_del(&adp->list); kfree(adp); } } /* nft audit emits the number of elements that get added/removed/updated, * so NEW/DELSETELEM needs to increment based on the total elem count. */ static unsigned int nf_tables_commit_audit_entrycount(const struct nft_trans *trans) { switch (trans->msg_type) { case NFT_MSG_NEWSETELEM: case NFT_MSG_DELSETELEM: return nft_trans_container_elem(trans)->nelems; } return 1; } static void nf_tables_commit_audit_collect(struct list_head *adl, const struct nft_trans *trans, u32 op) { const struct nft_table *table = trans->table; struct nft_audit_data *adp; list_for_each_entry(adp, adl, list) { if (adp->table == table) goto found; } WARN_ONCE(1, "table=%s not expected in commit list", table->name); return; found: adp->entries += nf_tables_commit_audit_entrycount(trans); if (!adp->op || adp->op > op) adp->op = op; } #define AUNFTABLENAMELEN (NFT_TABLE_MAXNAMELEN + 22) static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation) { struct nft_audit_data *adp, *adn; char aubuf[AUNFTABLENAMELEN]; list_for_each_entry_safe(adp, adn, adl, list) { snprintf(aubuf, AUNFTABLENAMELEN, "%s:%u", adp->table->name, generation); audit_log_nfcfg(aubuf, adp->table->family, adp->entries, nft2audit_op[adp->op], GFP_KERNEL); list_del(&adp->list); kfree(adp); } } static void nft_set_commit_update(struct list_head *set_update_list) { struct nft_set *set, *next; list_for_each_entry_safe(set, next, set_update_list, pending_update) { list_del_init(&set->pending_update); if (!set->ops->commit || set->dead) continue; set->ops->commit(set); } } static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net) { unsigned int gc_seq; /* Bump gc counter, it becomes odd, this is the busy mark. */ gc_seq = READ_ONCE(nft_net->gc_seq); WRITE_ONCE(nft_net->gc_seq, ++gc_seq); return gc_seq; } static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq) { WRITE_ONCE(nft_net->gc_seq, ++gc_seq); } static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nftables_pernet *nft_net = nft_pernet(net); const struct nlmsghdr *nlh = nlmsg_hdr(skb); struct nft_trans_binding *trans_binding; struct nft_trans *trans, *next; unsigned int base_seq, gc_seq; LIST_HEAD(set_update_list); struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; struct nft_ctx ctx; LIST_HEAD(adl); int err; if (list_empty(&nft_net->commit_list)) { mutex_unlock(&nft_net->commit_mutex); return 0; } nft_ctx_init(&ctx, net, skb, nlh, NFPROTO_UNSPEC, NULL, NULL, NULL); list_for_each_entry(trans_binding, &nft_net->binding_list, binding_list) { trans = &trans_binding->nft_trans; switch (trans->msg_type) { case NFT_MSG_NEWSET: if (!nft_trans_set_update(trans) && nft_set_is_anonymous(nft_trans_set(trans)) && !nft_trans_set_bound(trans)) { pr_warn_once("nftables ruleset with unbound set\n"); return -EINVAL; } break; case NFT_MSG_NEWCHAIN: if (!nft_trans_chain_update(trans) && nft_chain_binding(nft_trans_chain(trans)) && !nft_trans_chain_bound(trans)) { pr_warn_once("nftables ruleset with unbound chain\n"); return -EINVAL; } break; default: WARN_ONCE(1, "Unhandled bind type %d", trans->msg_type); break; } } /* 0. Validate ruleset, otherwise roll back for error reporting. */ if (nf_tables_validate(net) < 0) { nft_net->validate_state = NFT_VALIDATE_DO; return -EAGAIN; } err = nft_flow_rule_offload_commit(net); if (err < 0) return err; /* 1. Allocate space for next generation rules_gen_X[] */ list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { struct nft_table *table = trans->table; int ret; ret = nf_tables_commit_audit_alloc(&adl, table); if (ret) { nf_tables_commit_chain_prepare_cancel(net); nf_tables_commit_audit_free(&adl); return ret; } if (trans->msg_type == NFT_MSG_NEWRULE || trans->msg_type == NFT_MSG_DELRULE) { chain = nft_trans_rule_chain(trans); ret = nf_tables_commit_chain_prepare(net, chain); if (ret < 0) { nf_tables_commit_chain_prepare_cancel(net); nf_tables_commit_audit_free(&adl); return ret; } } } /* step 2. Make rules_gen_X visible to packet path */ list_for_each_entry(table, &nft_net->tables, list) { list_for_each_entry(chain, &table->chains, list) nf_tables_commit_chain(net, chain); } /* * Bump generation counter, invalidate any dump in progress. * Cannot fail after this point. */ base_seq = READ_ONCE(nft_net->base_seq); while (++base_seq == 0) ; WRITE_ONCE(nft_net->base_seq, base_seq); gc_seq = nft_gc_seq_begin(nft_net); /* step 3. Start new generation, rules_gen_X now in use. */ net->nft.gencursor = nft_gencursor_next(net); list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { struct nft_table *table = trans->table; nft_ctx_update(&ctx, trans); nf_tables_commit_audit_collect(&adl, trans, trans->msg_type); switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { if (!(table->flags & __NFT_TABLE_F_UPDATE)) { nft_trans_destroy(trans); break; } if (table->flags & NFT_TABLE_F_DORMANT) nf_tables_table_disable(net, table); table->flags &= ~__NFT_TABLE_F_UPDATE; } else { nft_clear(net, table); } nf_tables_table_notify(&ctx, NFT_MSG_NEWTABLE); nft_trans_destroy(trans); break; case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: list_del_rcu(&table->list); nf_tables_table_notify(&ctx, trans->msg_type); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { nft_chain_commit_update(nft_trans_container_chain(trans)); nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, &nft_trans_chain_hooks(trans)); list_splice(&nft_trans_chain_hooks(trans), &nft_trans_basechain(trans)->hook_list); /* trans destroyed after rcu grace period */ } else { nft_chain_commit_drop_policy(nft_trans_container_chain(trans)); nft_clear(net, nft_trans_chain(trans)); nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, NULL); nft_trans_destroy(trans); } break; case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: if (nft_trans_chain_update(trans)) { nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, &nft_trans_chain_hooks(trans)); if (!(table->flags & NFT_TABLE_F_DORMANT)) { nft_netdev_unregister_hooks(net, &nft_trans_chain_hooks(trans), true); } } else { nft_chain_del(nft_trans_chain(trans)); nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, NULL); nf_tables_unregister_hook(ctx.net, ctx.table, nft_trans_chain(trans)); } break; case NFT_MSG_NEWRULE: nft_clear(net, nft_trans_rule(trans)); nf_tables_rule_notify(&ctx, nft_trans_rule(trans), NFT_MSG_NEWRULE); if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); nft_trans_destroy(trans); break; case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: list_del_rcu(&nft_trans_rule(trans)->list); nf_tables_rule_notify(&ctx, nft_trans_rule(trans), trans->msg_type); nft_rule_expr_deactivate(&ctx, nft_trans_rule(trans), NFT_TRANS_COMMIT); if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_NEWSET: list_del(&nft_trans_container_set(trans)->list_trans_newset); if (nft_trans_set_update(trans)) { struct nft_set *set = nft_trans_set(trans); WRITE_ONCE(set->timeout, nft_trans_set_timeout(trans)); WRITE_ONCE(set->gc_int, nft_trans_set_gc_int(trans)); if (nft_trans_set_size(trans)) WRITE_ONCE(set->size, nft_trans_set_size(trans)); } else { nft_clear(net, nft_trans_set(trans)); /* This avoids hitting -EBUSY when deleting the table * from the transaction. */ if (nft_set_is_anonymous(nft_trans_set(trans)) && !list_empty(&nft_trans_set(trans)->bindings)) nft_use_dec(&table->use); } nf_tables_set_notify(&ctx, nft_trans_set(trans), NFT_MSG_NEWSET, GFP_KERNEL); nft_trans_destroy(trans); break; case NFT_MSG_DELSET: case NFT_MSG_DESTROYSET: nft_trans_set(trans)->dead = 1; list_del_rcu(&nft_trans_set(trans)->list); nf_tables_set_notify(&ctx, nft_trans_set(trans), trans->msg_type, GFP_KERNEL); break; case NFT_MSG_NEWSETELEM: te = nft_trans_container_elem(trans); nft_trans_elems_add(&ctx, te); if (te->set->ops->commit && list_empty(&te->set->pending_update)) { list_add_tail(&te->set->pending_update, &set_update_list); } nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: te = nft_trans_container_elem(trans); nft_trans_elems_remove(&ctx, te); if (te->set->ops->commit && list_empty(&te->set->pending_update)) { list_add_tail(&te->set->pending_update, &set_update_list); } break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { nft_obj_commit_update(&ctx, trans); nf_tables_obj_notify(&ctx, nft_trans_obj(trans), NFT_MSG_NEWOBJ); } else { nft_clear(net, nft_trans_obj(trans)); nf_tables_obj_notify(&ctx, nft_trans_obj(trans), NFT_MSG_NEWOBJ); nft_trans_destroy(trans); } break; case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: nft_obj_del(nft_trans_obj(trans)); nf_tables_obj_notify(&ctx, nft_trans_obj(trans), trans->msg_type); break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) { nft_trans_flowtable(trans)->data.flags = nft_trans_flowtable_flags(trans); nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), NFT_MSG_NEWFLOWTABLE); list_splice(&nft_trans_flowtable_hooks(trans), &nft_trans_flowtable(trans)->hook_list); } else { nft_clear(net, nft_trans_flowtable(trans)); nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), NULL, NFT_MSG_NEWFLOWTABLE); } nft_trans_destroy(trans); break; case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: if (nft_trans_flowtable_update(trans)) { nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), trans->msg_type); nft_unregister_flowtable_net_hooks(net, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans)); } else { list_del_rcu(&nft_trans_flowtable(trans)->list); nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), NULL, trans->msg_type); nft_unregister_flowtable_net_hooks(net, nft_trans_flowtable(trans), &nft_trans_flowtable(trans)->hook_list); } break; } } nft_set_commit_update(&set_update_list); nft_commit_notify(net, NETLINK_CB(skb).portid); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_commit_audit_log(&adl, nft_net->base_seq); nft_gc_seq_end(nft_net, gc_seq); nft_net->validate_state = NFT_VALIDATE_SKIP; nf_tables_commit_release(net); return 0; } static void nf_tables_module_autoload(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_module_request *req, *next; LIST_HEAD(module_list); list_splice_init(&nft_net->module_list, &module_list); mutex_unlock(&nft_net->commit_mutex); list_for_each_entry_safe(req, next, &module_list, list) { request_module("%s", req->module); req->done = true; } mutex_lock(&nft_net->commit_mutex); list_splice(&module_list, &nft_net->module_list); } static void nf_tables_abort_release(struct nft_trans *trans) { struct nft_ctx ctx = { }; nft_ctx_update(&ctx, trans); switch (trans->msg_type) { case NFT_MSG_NEWTABLE: nf_tables_table_destroy(trans->table); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) nft_hooks_destroy(&nft_trans_chain_hooks(trans)); else nf_tables_chain_destroy(nft_trans_chain(trans)); break; case NFT_MSG_NEWRULE: nf_tables_rule_destroy(&ctx, nft_trans_rule(trans)); break; case NFT_MSG_NEWSET: nft_set_destroy(&ctx, nft_trans_set(trans)); break; case NFT_MSG_NEWSETELEM: nft_trans_set_elem_destroy(&ctx, nft_trans_container_elem(trans)); break; case NFT_MSG_NEWOBJ: nft_obj_destroy(&ctx, nft_trans_obj(trans)); break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) nft_hooks_destroy(&nft_trans_flowtable_hooks(trans)); else nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); break; } kfree(trans); } static void nft_set_abort_update(struct list_head *set_update_list) { struct nft_set *set, *next; list_for_each_entry_safe(set, next, set_update_list, pending_update) { list_del_init(&set->pending_update); if (!set->ops->abort) continue; set->ops->abort(set); } } static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans *trans, *next; LIST_HEAD(set_update_list); struct nft_trans_elem *te; struct nft_ctx ctx = { .net = net, }; int err = 0; if (action == NFNL_ABORT_VALIDATE && nf_tables_validate(net) < 0) err = -EAGAIN; list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { struct nft_table *table = trans->table; nft_ctx_update(&ctx, trans); switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { if (!(table->flags & __NFT_TABLE_F_UPDATE)) { nft_trans_destroy(trans); break; } if (table->flags & __NFT_TABLE_F_WAS_DORMANT) { nf_tables_table_disable(net, table); table->flags |= NFT_TABLE_F_DORMANT; } else if (table->flags & __NFT_TABLE_F_WAS_AWAKEN) { table->flags &= ~NFT_TABLE_F_DORMANT; } if (table->flags & __NFT_TABLE_F_WAS_ORPHAN) { table->flags &= ~NFT_TABLE_F_OWNER; table->nlpid = 0; } table->flags &= ~__NFT_TABLE_F_UPDATE; nft_trans_destroy(trans); } else { list_del_rcu(&table->list); } break; case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: nft_clear(trans->net, table); nft_trans_destroy(trans); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { if (!(table->flags & NFT_TABLE_F_DORMANT)) { nft_netdev_unregister_hooks(net, &nft_trans_chain_hooks(trans), true); } free_percpu(nft_trans_chain_stats(trans)); kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); } else { if (nft_trans_chain_bound(trans)) { nft_trans_destroy(trans); break; } nft_use_dec_restore(&table->use); nft_chain_del(nft_trans_chain(trans)); nf_tables_unregister_hook(trans->net, table, nft_trans_chain(trans)); } break; case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: if (nft_trans_chain_update(trans)) { list_splice(&nft_trans_chain_hooks(trans), &nft_trans_basechain(trans)->hook_list); } else { nft_use_inc_restore(&table->use); nft_clear(trans->net, nft_trans_chain(trans)); } nft_trans_destroy(trans); break; case NFT_MSG_NEWRULE: if (nft_trans_rule_bound(trans)) { nft_trans_destroy(trans); break; } nft_use_dec_restore(&nft_trans_rule_chain(trans)->use); list_del_rcu(&nft_trans_rule(trans)->list); nft_rule_expr_deactivate(&ctx, nft_trans_rule(trans), NFT_TRANS_ABORT); if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: nft_use_inc_restore(&nft_trans_rule_chain(trans)->use); nft_clear(trans->net, nft_trans_rule(trans)); nft_rule_expr_activate(&ctx, nft_trans_rule(trans)); if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: list_del(&nft_trans_container_set(trans)->list_trans_newset); if (nft_trans_set_update(trans)) { nft_trans_destroy(trans); break; } nft_use_dec_restore(&table->use); if (nft_trans_set_bound(trans)) { nft_trans_destroy(trans); break; } nft_trans_set(trans)->dead = 1; list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: case NFT_MSG_DESTROYSET: nft_use_inc_restore(&table->use); nft_clear(trans->net, nft_trans_set(trans)); if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) nft_map_activate(&ctx, nft_trans_set(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: if (nft_trans_elem_set_bound(trans)) { nft_trans_destroy(trans); break; } te = nft_trans_container_elem(trans); if (!nft_trans_elems_new_abort(&ctx, te)) { nft_trans_destroy(trans); break; } if (te->set->ops->abort && list_empty(&te->set->pending_update)) { list_add_tail(&te->set->pending_update, &set_update_list); } break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: te = nft_trans_container_elem(trans); nft_trans_elems_destroy_abort(&ctx, te); if (te->set->ops->abort && list_empty(&te->set->pending_update)) { list_add_tail(&te->set->pending_update, &set_update_list); } nft_trans_destroy(trans); break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { nft_obj_destroy(&ctx, nft_trans_obj_newobj(trans)); nft_trans_destroy(trans); } else { nft_use_dec_restore(&table->use); nft_obj_del(nft_trans_obj(trans)); } break; case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: nft_use_inc_restore(&table->use); nft_clear(trans->net, nft_trans_obj(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) { nft_unregister_flowtable_net_hooks(net, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans)); } else { nft_use_dec_restore(&table->use); list_del_rcu(&nft_trans_flowtable(trans)->list); nft_unregister_flowtable_net_hooks(net, nft_trans_flowtable(trans), &nft_trans_flowtable(trans)->hook_list); } break; case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: if (nft_trans_flowtable_update(trans)) { list_splice(&nft_trans_flowtable_hooks(trans), &nft_trans_flowtable(trans)->hook_list); } else { nft_use_inc_restore(&table->use); nft_clear(trans->net, nft_trans_flowtable(trans)); } nft_trans_destroy(trans); break; } } WARN_ON_ONCE(!list_empty(&nft_net->commit_set_list)); nft_set_abort_update(&set_update_list); synchronize_rcu(); list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { nft_trans_list_del(trans); nf_tables_abort_release(trans); } return err; } static int nf_tables_abort(struct net *net, struct sk_buff *skb, enum nfnl_abort_action action) { struct nftables_pernet *nft_net = nft_pernet(net); unsigned int gc_seq; int ret; gc_seq = nft_gc_seq_begin(nft_net); ret = __nf_tables_abort(net, action); nft_gc_seq_end(nft_net, gc_seq); WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); /* module autoload needs to happen after GC sequence update because it * temporarily releases and grabs mutex again. */ if (action == NFNL_ABORT_AUTOLOAD) nf_tables_module_autoload(net); else nf_tables_module_autoload_cleanup(net); mutex_unlock(&nft_net->commit_mutex); return ret; } static bool nf_tables_valid_genid(struct net *net, u32 genid) { struct nftables_pernet *nft_net = nft_pernet(net); bool genid_ok; mutex_lock(&nft_net->commit_mutex); nft_net->tstamp = get_jiffies_64(); genid_ok = genid == 0 || nft_net->base_seq == genid; if (!genid_ok) mutex_unlock(&nft_net->commit_mutex); /* else, commit mutex has to be released by commit or abort function */ return genid_ok; } static const struct nfnetlink_subsystem nf_tables_subsys = { .name = "nf_tables", .subsys_id = NFNL_SUBSYS_NFTABLES, .cb_count = NFT_MSG_MAX, .cb = nf_tables_cb, .commit = nf_tables_commit, .abort = nf_tables_abort, .valid_genid = nf_tables_valid_genid, .owner = THIS_MODULE, }; int nft_chain_validate_dependency(const struct nft_chain *chain, enum nft_chain_types type) { const struct nft_base_chain *basechain; if (nft_is_base_chain(chain)) { basechain = nft_base_chain(chain); if (basechain->type->type != type) return -EOPNOTSUPP; } return 0; } EXPORT_SYMBOL_GPL(nft_chain_validate_dependency); int nft_chain_validate_hooks(const struct nft_chain *chain, unsigned int hook_flags) { struct nft_base_chain *basechain; if (nft_is_base_chain(chain)) { basechain = nft_base_chain(chain); if ((1 << basechain->ops.hooknum) & hook_flags) return 0; return -EOPNOTSUPP; } return 0; } EXPORT_SYMBOL_GPL(nft_chain_validate_hooks); /** * nft_parse_u32_check - fetch u32 attribute and check for maximum value * * @attr: netlink attribute to fetch value from * @max: maximum value to be stored in dest * @dest: pointer to the variable * * Parse, check and store a given u32 netlink attribute into variable. * This function returns -ERANGE if the value goes over maximum value. * Otherwise a 0 is returned and the attribute value is stored in the * destination variable. */ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) { u32 val; val = ntohl(nla_get_be32(attr)); if (val > max) return -ERANGE; *dest = val; return 0; } EXPORT_SYMBOL_GPL(nft_parse_u32_check); static int nft_parse_register(const struct nlattr *attr, u32 *preg) { unsigned int reg; reg = ntohl(nla_get_be32(attr)); switch (reg) { case NFT_REG_VERDICT...NFT_REG_4: *preg = reg * NFT_REG_SIZE / NFT_REG32_SIZE; break; case NFT_REG32_00...NFT_REG32_15: *preg = reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00; break; default: return -ERANGE; } return 0; } /** * nft_dump_register - dump a register value to a netlink attribute * * @skb: socket buffer * @attr: attribute number * @reg: register number * * Construct a netlink attribute containing the register number. For * compatibility reasons, register numbers being a multiple of 4 are * translated to the corresponding 128 bit register numbers. */ int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg) { if (reg % (NFT_REG_SIZE / NFT_REG32_SIZE) == 0) reg = reg / (NFT_REG_SIZE / NFT_REG32_SIZE); else reg = reg - NFT_REG_SIZE / NFT_REG32_SIZE + NFT_REG32_00; return nla_put_be32(skb, attr, htonl(reg)); } EXPORT_SYMBOL_GPL(nft_dump_register); static int nft_validate_register_load(enum nft_registers reg, unsigned int len) { if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE) return -EINVAL; if (len == 0) return -EINVAL; if (reg * NFT_REG32_SIZE + len > sizeof_field(struct nft_regs, data)) return -ERANGE; return 0; } int nft_parse_register_load(const struct nft_ctx *ctx, const struct nlattr *attr, u8 *sreg, u32 len) { int err, invalid_reg; u32 reg, next_register; err = nft_parse_register(attr, ®); if (err < 0) return err; err = nft_validate_register_load(reg, len); if (err < 0) return err; next_register = DIV_ROUND_UP(len, NFT_REG32_SIZE) + reg; /* Can't happen: nft_validate_register_load() should have failed */ if (WARN_ON_ONCE(next_register > NFT_REG32_NUM)) return -EINVAL; /* find first register that did not see an earlier store. */ invalid_reg = find_next_zero_bit(ctx->reg_inited, NFT_REG32_NUM, reg); /* invalid register within the range that we're loading from? */ if (invalid_reg < next_register) return -ENODATA; *sreg = reg; return 0; } EXPORT_SYMBOL_GPL(nft_parse_register_load); static void nft_saw_register_store(const struct nft_ctx *__ctx, int reg, unsigned int len) { unsigned int registers = DIV_ROUND_UP(len, NFT_REG32_SIZE); struct nft_ctx *ctx = (struct nft_ctx *)__ctx; if (WARN_ON_ONCE(len == 0 || reg < 0)) return; bitmap_set(ctx->reg_inited, reg, registers); } static int nft_validate_register_store(const struct nft_ctx *ctx, enum nft_registers reg, const struct nft_data *data, enum nft_data_types type, unsigned int len) { int err; switch (reg) { case NFT_REG_VERDICT: if (type != NFT_DATA_VERDICT) return -EINVAL; if (data != NULL && (data->verdict.code == NFT_GOTO || data->verdict.code == NFT_JUMP)) { err = nft_chain_validate(ctx, data->verdict.chain); if (err < 0) return err; } break; default: if (type != NFT_DATA_VALUE) return -EINVAL; if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE) return -EINVAL; if (len == 0) return -EINVAL; if (reg * NFT_REG32_SIZE + len > sizeof_field(struct nft_regs, data)) return -ERANGE; break; } nft_saw_register_store(ctx, reg, len); return 0; } int nft_parse_register_store(const struct nft_ctx *ctx, const struct nlattr *attr, u8 *dreg, const struct nft_data *data, enum nft_data_types type, unsigned int len) { int err; u32 reg; err = nft_parse_register(attr, ®); if (err < 0) return err; err = nft_validate_register_store(ctx, reg, data, type, len); if (err < 0) return err; *dreg = reg; return 0; } EXPORT_SYMBOL_GPL(nft_parse_register_store); static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = { [NFTA_VERDICT_CODE] = { .type = NLA_U32 }, [NFTA_VERDICT_CHAIN] = { .type = NLA_STRING, .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_VERDICT_CHAIN_ID] = { .type = NLA_U32 }, }; static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, struct nft_data_desc *desc, const struct nlattr *nla) { u8 genmask = nft_genmask_next(ctx->net); struct nlattr *tb[NFTA_VERDICT_MAX + 1]; struct nft_chain *chain; int err; err = nla_parse_nested_deprecated(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy, NULL); if (err < 0) return err; if (!tb[NFTA_VERDICT_CODE]) return -EINVAL; /* zero padding hole for memcmp */ memset(data, 0, sizeof(*data)); data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); switch (data->verdict.code) { case NF_ACCEPT: case NF_DROP: case NF_QUEUE: break; case NFT_CONTINUE: case NFT_BREAK: case NFT_RETURN: break; case NFT_JUMP: case NFT_GOTO: if (tb[NFTA_VERDICT_CHAIN]) { chain = nft_chain_lookup(ctx->net, ctx->table, tb[NFTA_VERDICT_CHAIN], genmask); } else if (tb[NFTA_VERDICT_CHAIN_ID]) { chain = nft_chain_lookup_byid(ctx->net, ctx->table, tb[NFTA_VERDICT_CHAIN_ID], genmask); if (IS_ERR(chain)) return PTR_ERR(chain); } else { return -EINVAL; } if (IS_ERR(chain)) return PTR_ERR(chain); if (nft_is_base_chain(chain)) return -EOPNOTSUPP; if (nft_chain_is_bound(chain)) return -EINVAL; if (desc->flags & NFT_DATA_DESC_SETELEM && chain->flags & NFT_CHAIN_BINDING) return -EINVAL; if (!nft_use_inc(&chain->use)) return -EMFILE; data->verdict.chain = chain; break; default: return -EINVAL; } desc->len = sizeof(data->verdict); return 0; } static void nft_verdict_uninit(const struct nft_data *data) { struct nft_chain *chain; switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: chain = data->verdict.chain; nft_use_dec(&chain->use); break; } } int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v) { struct nlattr *nest; nest = nla_nest_start_noflag(skb, type); if (!nest) goto nla_put_failure; if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(v->code))) goto nla_put_failure; switch (v->code) { case NFT_JUMP: case NFT_GOTO: if (nla_put_string(skb, NFTA_VERDICT_CHAIN, v->chain->name)) goto nla_put_failure; } nla_nest_end(skb, nest); return 0; nla_put_failure: return -1; } static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data, struct nft_data_desc *desc, const struct nlattr *nla) { unsigned int len; len = nla_len(nla); if (len == 0) return -EINVAL; if (len > desc->size) return -EOVERFLOW; if (desc->len) { if (len != desc->len) return -EINVAL; } else { desc->len = len; } nla_memcpy(data->data, nla, len); return 0; } static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data, unsigned int len) { return nla_put(skb, NFTA_DATA_VALUE, len, data->data); } static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { [NFTA_DATA_VALUE] = { .type = NLA_BINARY }, [NFTA_DATA_VERDICT] = { .type = NLA_NESTED }, }; /** * nft_data_init - parse nf_tables data netlink attributes * * @ctx: context of the expression using the data * @data: destination struct nft_data * @desc: data description * @nla: netlink attribute containing data * * Parse the netlink data attributes and initialize a struct nft_data. * The type and length of data are returned in the data description. * * The caller can indicate that it only wants to accept data of type * NFT_DATA_VALUE by passing NULL for the ctx argument. */ int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, struct nft_data_desc *desc, const struct nlattr *nla) { struct nlattr *tb[NFTA_DATA_MAX + 1]; int err; if (WARN_ON_ONCE(!desc->size)) return -EINVAL; err = nla_parse_nested_deprecated(tb, NFTA_DATA_MAX, nla, nft_data_policy, NULL); if (err < 0) return err; if (tb[NFTA_DATA_VALUE]) { if (desc->type != NFT_DATA_VALUE) return -EINVAL; err = nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]); } else if (tb[NFTA_DATA_VERDICT] && ctx != NULL) { if (desc->type != NFT_DATA_VERDICT) return -EINVAL; err = nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]); } else { err = -EINVAL; } return err; } EXPORT_SYMBOL_GPL(nft_data_init); /** * nft_data_release - release a nft_data item * * @data: struct nft_data to release * @type: type of data * * Release a nft_data item. NFT_DATA_VALUE types can be silently discarded, * all others need to be released by calling this function. */ void nft_data_release(const struct nft_data *data, enum nft_data_types type) { if (type < NFT_DATA_VERDICT) return; switch (type) { case NFT_DATA_VERDICT: return nft_verdict_uninit(data); default: WARN_ON(1); } } EXPORT_SYMBOL_GPL(nft_data_release); int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, enum nft_data_types type, unsigned int len) { struct nlattr *nest; int err; nest = nla_nest_start_noflag(skb, attr); if (nest == NULL) return -1; switch (type) { case NFT_DATA_VALUE: err = nft_value_dump(skb, data, len); break; case NFT_DATA_VERDICT: err = nft_verdict_dump(skb, NFTA_DATA_VERDICT, &data->verdict); break; default: err = -EINVAL; WARN_ON(1); } nla_nest_end(skb, nest); return err; } EXPORT_SYMBOL_GPL(nft_data_dump); static void __nft_release_hook(struct net *net, struct nft_table *table) { struct nft_flowtable *flowtable; struct nft_chain *chain; list_for_each_entry(chain, &table->chains, list) __nf_tables_unregister_hook(net, table, chain, true); list_for_each_entry(flowtable, &table->flowtables, list) __nft_unregister_flowtable_net_hooks(net, flowtable, &flowtable->hook_list, true); } static void __nft_release_hooks(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_table *table; list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table)) continue; __nft_release_hook(net, table); } } static void __nft_release_table(struct net *net, struct nft_table *table) { struct nft_flowtable *flowtable, *nf; struct nft_chain *chain, *nc; struct nft_object *obj, *ne; struct nft_rule *rule, *nr; struct nft_set *set, *ns; struct nft_ctx ctx = { .net = net, .family = NFPROTO_NETDEV, }; ctx.family = table->family; ctx.table = table; list_for_each_entry(chain, &table->chains, list) { if (nft_chain_binding(chain)) continue; ctx.chain = chain; list_for_each_entry_safe(rule, nr, &chain->rules, list) { list_del(&rule->list); nft_use_dec(&chain->use); nf_tables_rule_release(&ctx, rule); } } list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { list_del(&flowtable->list); nft_use_dec(&table->use); nf_tables_flowtable_destroy(flowtable); } list_for_each_entry_safe(set, ns, &table->sets, list) { list_del(&set->list); nft_use_dec(&table->use); if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) nft_map_deactivate(&ctx, set); nft_set_destroy(&ctx, set); } list_for_each_entry_safe(obj, ne, &table->objects, list) { nft_obj_del(obj); nft_use_dec(&table->use); nft_obj_destroy(&ctx, obj); } list_for_each_entry_safe(chain, nc, &table->chains, list) { nft_chain_del(chain); nft_use_dec(&table->use); nf_tables_chain_destroy(chain); } nf_tables_table_destroy(table); } static void __nft_release_tables(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_table *table, *nt; list_for_each_entry_safe(table, nt, &nft_net->tables, list) { if (nft_table_has_owner(table)) continue; list_del(&table->list); __nft_release_table(net, table); } } static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, void *ptr) { struct nft_table *table, *to_delete[8]; struct nftables_pernet *nft_net; struct netlink_notify *n = ptr; struct net *net = n->net; unsigned int deleted; bool restart = false; unsigned int gc_seq; if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER) return NOTIFY_DONE; nft_net = nft_pernet(net); deleted = 0; mutex_lock(&nft_net->commit_mutex); gc_seq = nft_gc_seq_begin(nft_net); nf_tables_trans_destroy_flush_work(net); again: list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && n->portid == table->nlpid) { if (table->flags & NFT_TABLE_F_PERSIST) { table->flags &= ~NFT_TABLE_F_OWNER; continue; } __nft_release_hook(net, table); list_del_rcu(&table->list); to_delete[deleted++] = table; if (deleted >= ARRAY_SIZE(to_delete)) break; } } if (deleted) { restart = deleted >= ARRAY_SIZE(to_delete); synchronize_rcu(); while (deleted) __nft_release_table(net, to_delete[--deleted]); if (restart) goto again; } nft_gc_seq_end(nft_net, gc_seq); mutex_unlock(&nft_net->commit_mutex); return NOTIFY_DONE; } static struct notifier_block nft_nl_notifier = { .notifier_call = nft_rcv_nl_event, }; static int __net_init nf_tables_init_net(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); INIT_LIST_HEAD(&nft_net->tables); INIT_LIST_HEAD(&nft_net->commit_list); INIT_LIST_HEAD(&nft_net->destroy_list); INIT_LIST_HEAD(&nft_net->commit_set_list); INIT_LIST_HEAD(&nft_net->binding_list); INIT_LIST_HEAD(&nft_net->module_list); INIT_LIST_HEAD(&nft_net->notify_list); mutex_init(&nft_net->commit_mutex); nft_net->base_seq = 1; nft_net->gc_seq = 0; nft_net->validate_state = NFT_VALIDATE_SKIP; INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work); return 0; } static void __net_exit nf_tables_pre_exit_net(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); mutex_lock(&nft_net->commit_mutex); __nft_release_hooks(net); mutex_unlock(&nft_net->commit_mutex); } static void __net_exit nf_tables_exit_net(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); unsigned int gc_seq; mutex_lock(&nft_net->commit_mutex); gc_seq = nft_gc_seq_begin(nft_net); WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); WARN_ON_ONCE(!list_empty(&nft_net->commit_set_list)); if (!list_empty(&nft_net->module_list)) nf_tables_module_autoload_cleanup(net); cancel_work_sync(&nft_net->destroy_work); __nft_release_tables(net); nft_gc_seq_end(nft_net, gc_seq); mutex_unlock(&nft_net->commit_mutex); WARN_ON_ONCE(!list_empty(&nft_net->tables)); WARN_ON_ONCE(!list_empty(&nft_net->module_list)); WARN_ON_ONCE(!list_empty(&nft_net->notify_list)); WARN_ON_ONCE(!list_empty(&nft_net->destroy_list)); } static void nf_tables_exit_batch(struct list_head *net_exit_list) { flush_work(&trans_gc_work); } static struct pernet_operations nf_tables_net_ops = { .init = nf_tables_init_net, .pre_exit = nf_tables_pre_exit_net, .exit = nf_tables_exit_net, .exit_batch = nf_tables_exit_batch, .id = &nf_tables_net_id, .size = sizeof(struct nftables_pernet), }; static int __init nf_tables_module_init(void) { int err; BUILD_BUG_ON(offsetof(struct nft_trans_table, nft_trans) != 0); BUILD_BUG_ON(offsetof(struct nft_trans_chain, nft_trans_binding.nft_trans) != 0); BUILD_BUG_ON(offsetof(struct nft_trans_rule, nft_trans) != 0); BUILD_BUG_ON(offsetof(struct nft_trans_set, nft_trans_binding.nft_trans) != 0); BUILD_BUG_ON(offsetof(struct nft_trans_elem, nft_trans) != 0); BUILD_BUG_ON(offsetof(struct nft_trans_obj, nft_trans) != 0); BUILD_BUG_ON(offsetof(struct nft_trans_flowtable, nft_trans) != 0); err = register_pernet_subsys(&nf_tables_net_ops); if (err < 0) return err; err = nft_chain_filter_init(); if (err < 0) goto err_chain_filter; err = nf_tables_core_module_init(); if (err < 0) goto err_core_module; err = register_netdevice_notifier(&nf_tables_flowtable_notifier); if (err < 0) goto err_netdev_notifier; err = rhltable_init(&nft_objname_ht, &nft_objname_ht_params); if (err < 0) goto err_rht_objname; err = nft_offload_init(); if (err < 0) goto err_offload; err = netlink_register_notifier(&nft_nl_notifier); if (err < 0) goto err_netlink_notifier; /* must be last */ err = nfnetlink_subsys_register(&nf_tables_subsys); if (err < 0) goto err_nfnl_subsys; nft_chain_route_init(); return err; err_nfnl_subsys: netlink_unregister_notifier(&nft_nl_notifier); err_netlink_notifier: nft_offload_exit(); err_offload: rhltable_destroy(&nft_objname_ht); err_rht_objname: unregister_netdevice_notifier(&nf_tables_flowtable_notifier); err_netdev_notifier: nf_tables_core_module_exit(); err_core_module: nft_chain_filter_fini(); err_chain_filter: unregister_pernet_subsys(&nf_tables_net_ops); return err; } static void __exit nf_tables_module_exit(void) { nfnetlink_subsys_unregister(&nf_tables_subsys); netlink_unregister_notifier(&nft_nl_notifier); nft_offload_exit(); unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); nft_chain_route_fini(); unregister_pernet_subsys(&nf_tables_net_ops); cancel_work_sync(&trans_gc_work); rcu_barrier(); rhltable_destroy(&nft_objname_ht); nf_tables_core_module_exit(); } module_init(nf_tables_module_init); module_exit(nf_tables_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("Framework for packet filtering and classification"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES); |
| 24 22 22 20 22 22 22 24 24 24 24 23 9 6 6 6 6 6 6 15 14 14 14 14 14 9 2 7 3 7 6 6 7 2 14 14 14 14 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 | // SPDX-License-Identifier: GPL-2.0-only #include <net/netdev_lock.h> #include "netlink.h" #include "common.h" #include "bitset.h" struct features_req_info { struct ethnl_req_info base; }; struct features_reply_data { struct ethnl_reply_data base; u32 hw[ETHTOOL_DEV_FEATURE_WORDS]; u32 wanted[ETHTOOL_DEV_FEATURE_WORDS]; u32 active[ETHTOOL_DEV_FEATURE_WORDS]; u32 nochange[ETHTOOL_DEV_FEATURE_WORDS]; u32 all[ETHTOOL_DEV_FEATURE_WORDS]; }; #define FEATURES_REPDATA(__reply_base) \ container_of(__reply_base, struct features_reply_data, base) const struct nla_policy ethnl_features_get_policy[] = { [ETHTOOL_A_FEATURES_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), }; static void ethnl_features_to_bitmap32(u32 *dest, netdev_features_t src) { unsigned int i; for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; i++) dest[i] = src >> (32 * i); } static int features_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct features_reply_data *data = FEATURES_REPDATA(reply_base); struct net_device *dev = reply_base->dev; netdev_features_t all_features; ethnl_features_to_bitmap32(data->hw, dev->hw_features); ethnl_features_to_bitmap32(data->wanted, dev->wanted_features); ethnl_features_to_bitmap32(data->active, dev->features); ethnl_features_to_bitmap32(data->nochange, NETIF_F_NEVER_CHANGE); all_features = GENMASK_ULL(NETDEV_FEATURE_COUNT - 1, 0); ethnl_features_to_bitmap32(data->all, all_features); return 0; } static int features_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct features_reply_data *data = FEATURES_REPDATA(reply_base); bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; unsigned int len = 0; int ret; ret = ethnl_bitset32_size(data->hw, data->all, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) return ret; len += ret; ret = ethnl_bitset32_size(data->wanted, NULL, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) return ret; len += ret; ret = ethnl_bitset32_size(data->active, NULL, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) return ret; len += ret; ret = ethnl_bitset32_size(data->nochange, NULL, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) return ret; len += ret; return len; } static int features_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct features_reply_data *data = FEATURES_REPDATA(reply_base); bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; int ret; ret = ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_HW, data->hw, data->all, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) return ret; ret = ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_WANTED, data->wanted, NULL, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) return ret; ret = ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_ACTIVE, data->active, NULL, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) return ret; return ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_NOCHANGE, data->nochange, NULL, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); } const struct ethnl_request_ops ethnl_features_request_ops = { .request_cmd = ETHTOOL_MSG_FEATURES_GET, .reply_cmd = ETHTOOL_MSG_FEATURES_GET_REPLY, .hdr_attr = ETHTOOL_A_FEATURES_HEADER, .req_info_size = sizeof(struct features_req_info), .reply_data_size = sizeof(struct features_reply_data), .prepare_data = features_prepare_data, .reply_size = features_reply_size, .fill_reply = features_fill_reply, }; /* FEATURES_SET */ const struct nla_policy ethnl_features_set_policy[] = { [ETHTOOL_A_FEATURES_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_FEATURES_WANTED] = { .type = NLA_NESTED }, }; static void ethnl_features_to_bitmap(unsigned long *dest, netdev_features_t val) { const unsigned int words = BITS_TO_LONGS(NETDEV_FEATURE_COUNT); unsigned int i; for (i = 0; i < words; i++) dest[i] = (unsigned long)(val >> (i * BITS_PER_LONG)); } static netdev_features_t ethnl_bitmap_to_features(unsigned long *src) { const unsigned int nft_bits = sizeof(netdev_features_t) * BITS_PER_BYTE; const unsigned int words = BITS_TO_LONGS(NETDEV_FEATURE_COUNT); netdev_features_t ret = 0; unsigned int i; for (i = 0; i < words; i++) ret |= (netdev_features_t)(src[i]) << (i * BITS_PER_LONG); ret &= ~(netdev_features_t)0 >> (nft_bits - NETDEV_FEATURE_COUNT); return ret; } static int features_send_reply(struct net_device *dev, struct genl_info *info, const unsigned long *wanted, const unsigned long *wanted_mask, const unsigned long *active, const unsigned long *active_mask, bool compact) { struct sk_buff *rskb; void *reply_payload; int reply_len = 0; int ret; reply_len = ethnl_reply_header_size(); ret = ethnl_bitset_size(wanted, wanted_mask, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) goto err; reply_len += ret; ret = ethnl_bitset_size(active, active_mask, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) goto err; reply_len += ret; ret = -ENOMEM; rskb = ethnl_reply_init(reply_len, dev, ETHTOOL_MSG_FEATURES_SET_REPLY, ETHTOOL_A_FEATURES_HEADER, info, &reply_payload); if (!rskb) goto err; ret = ethnl_put_bitset(rskb, ETHTOOL_A_FEATURES_WANTED, wanted, wanted_mask, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) goto nla_put_failure; ret = ethnl_put_bitset(rskb, ETHTOOL_A_FEATURES_ACTIVE, active, active_mask, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) goto nla_put_failure; genlmsg_end(rskb, reply_payload); ret = genlmsg_reply(rskb, info); return ret; nla_put_failure: nlmsg_free(rskb); WARN_ONCE(1, "calculated message payload length (%d) not sufficient\n", reply_len); err: GENL_SET_ERR_MSG(info, "failed to send reply message"); return ret; } int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) { DECLARE_BITMAP(wanted_diff_mask, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(active_diff_mask, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(old_active, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(old_wanted, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(new_active, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(new_wanted, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(req_wanted, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(req_mask, NETDEV_FEATURE_COUNT); struct ethnl_req_info req_info = {}; struct nlattr **tb = info->attrs; struct net_device *dev; bool mod; int ret; if (!tb[ETHTOOL_A_FEATURES_WANTED]) return -EINVAL; ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_FEATURES_HEADER], genl_info_net(info), info->extack, true); if (ret < 0) return ret; dev = req_info.dev; rtnl_lock(); netdev_lock_ops(dev); ret = ethnl_ops_begin(dev); if (ret < 0) goto out_unlock; ethnl_features_to_bitmap(old_active, dev->features); ethnl_features_to_bitmap(old_wanted, dev->wanted_features); ret = ethnl_parse_bitset(req_wanted, req_mask, NETDEV_FEATURE_COUNT, tb[ETHTOOL_A_FEATURES_WANTED], netdev_features_strings, info->extack); if (ret < 0) goto out_ops; if (ethnl_bitmap_to_features(req_mask) & ~NETIF_F_ETHTOOL_BITS) { GENL_SET_ERR_MSG(info, "attempt to change non-ethtool features"); ret = -EINVAL; goto out_ops; } /* set req_wanted bits not in req_mask from old_wanted */ bitmap_and(req_wanted, req_wanted, req_mask, NETDEV_FEATURE_COUNT); bitmap_andnot(new_wanted, old_wanted, req_mask, NETDEV_FEATURE_COUNT); bitmap_or(req_wanted, new_wanted, req_wanted, NETDEV_FEATURE_COUNT); if (!bitmap_equal(req_wanted, old_wanted, NETDEV_FEATURE_COUNT)) { dev->wanted_features &= ~dev->hw_features; dev->wanted_features |= ethnl_bitmap_to_features(req_wanted) & dev->hw_features; __netdev_update_features(dev); } ethnl_features_to_bitmap(new_active, dev->features); mod = !bitmap_equal(old_active, new_active, NETDEV_FEATURE_COUNT); ret = 0; if (!(req_info.flags & ETHTOOL_FLAG_OMIT_REPLY)) { bool compact = req_info.flags & ETHTOOL_FLAG_COMPACT_BITSETS; bitmap_xor(wanted_diff_mask, req_wanted, new_active, NETDEV_FEATURE_COUNT); bitmap_xor(active_diff_mask, old_active, new_active, NETDEV_FEATURE_COUNT); bitmap_and(wanted_diff_mask, wanted_diff_mask, req_mask, NETDEV_FEATURE_COUNT); bitmap_and(req_wanted, req_wanted, wanted_diff_mask, NETDEV_FEATURE_COUNT); bitmap_and(new_active, new_active, active_diff_mask, NETDEV_FEATURE_COUNT); ret = features_send_reply(dev, info, req_wanted, wanted_diff_mask, new_active, active_diff_mask, compact); } if (mod) netdev_features_change(dev); out_ops: ethnl_ops_complete(dev); out_unlock: netdev_unlock_ops(dev); rtnl_unlock(); ethnl_parse_header_dev_put(&req_info); return ret; } |
| 346 341 347 346 350 133 132 121 121 118 344 240 348 343 349 346 348 346 350 348 349 347 349 348 345 345 345 350 344 347 347 343 118 119 119 118 119 119 119 119 118 117 3 3 4 102 4 117 117 71 48 70 102 6 101 57 118 118 102 71 77 4 74 47 114 90 48 114 118 121 120 120 133 134 134 133 132 130 130 129 125 130 121 119 130 45 30 45 23 45 135 135 135 134 134 134 130 130 1 122 121 119 117 118 45 118 114 114 41 16 135 5 4 3 3 2 2 2 2 1 5 4 3 2 1 1 5 1 4 1 3 3 3 3 3 4 141 140 139 137 137 136 136 136 135 4 4 4 136 141 2 2 2 1 1 1 1 2 2 4 145 145 139 139 4 145 13 14 5 14 5 5 4 1 4 3 1 2 1 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 | // SPDX-License-Identifier: GPL-2.0-only /* * Packet matching code. * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org> * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/cache.h> #include <linux/capability.h> #include <linux/skbuff.h> #include <linux/kmod.h> #include <linux/vmalloc.h> #include <linux/netdevice.h> #include <linux/module.h> #include <net/ip.h> #include <net/compat.h> #include <linux/uaccess.h> #include <linux/mutex.h> #include <linux/proc_fs.h> #include <linux/err.h> #include <linux/cpumask.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <net/netfilter/nf_log.h> #include "../../netfilter/xt_repldata.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv4 packet filter"); void *ipt_alloc_initial_table(const struct xt_table *info) { return xt_alloc_initial_table(ipt, IPT); } EXPORT_SYMBOL_GPL(ipt_alloc_initial_table); /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool ip_packet_match(const struct iphdr *ip, const char *indev, const char *outdev, const struct ipt_ip *ipinfo, int isfrag) { unsigned long ret; if (NF_INVF(ipinfo, IPT_INV_SRCIP, (ip->saddr & ipinfo->smsk.s_addr) != ipinfo->src.s_addr) || NF_INVF(ipinfo, IPT_INV_DSTIP, (ip->daddr & ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr)) return false; ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask); if (NF_INVF(ipinfo, IPT_INV_VIA_IN, ret != 0)) return false; ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask); if (NF_INVF(ipinfo, IPT_INV_VIA_OUT, ret != 0)) return false; /* Check specific protocol */ if (ipinfo->proto && NF_INVF(ipinfo, IPT_INV_PROTO, ip->protocol != ipinfo->proto)) return false; /* If we have a fragment rule but the packet is not a fragment * then we return zero */ if (NF_INVF(ipinfo, IPT_INV_FRAG, (ipinfo->flags & IPT_F_FRAG) && !isfrag)) return false; return true; } static bool ip_checkentry(const struct ipt_ip *ip) { if (ip->flags & ~IPT_F_MASK) return false; if (ip->invflags & ~IPT_INV_MASK) return false; return true; } static unsigned int ipt_error(struct sk_buff *skb, const struct xt_action_param *par) { net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo); return NF_DROP; } /* Performance critical */ static inline struct ipt_entry * get_entry(const void *base, unsigned int offset) { return (struct ipt_entry *)(base + offset); } /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ static inline bool unconditional(const struct ipt_entry *e) { static const struct ipt_ip uncond; return e->target_offset == sizeof(struct ipt_entry) && memcmp(&e->ip, &uncond, sizeof(uncond)) == 0; } /* for const-correctness */ static inline const struct xt_entry_target * ipt_get_target_c(const struct ipt_entry *e) { return ipt_get_target((struct ipt_entry *)e); } #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRAC |