Total coverage: 279779 (18%)of 1586992
8 8 8 6 6 6 17 11 8 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 // SPDX-License-Identifier: GPL-2.0 #include <linux/fs.h> #include <linux/sched.h> #include <linux/slab.h> #include "internal.h" #include "mount.h" static DEFINE_SPINLOCK(pin_lock); void pin_remove(struct fs_pin *pin) { spin_lock(&pin_lock); hlist_del_init(&pin->m_list); hlist_del_init(&pin->s_list); spin_unlock(&pin_lock); spin_lock_irq(&pin->wait.lock); pin->done = 1; wake_up_locked(&pin->wait); spin_unlock_irq(&pin->wait.lock); } void pin_insert(struct fs_pin *pin, struct vfsmount *m) { spin_lock(&pin_lock); hlist_add_head(&pin->s_list, &m->mnt_sb->s_pins); hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins); spin_unlock(&pin_lock); } void pin_kill(struct fs_pin *p) { wait_queue_entry_t wait; if (!p) { rcu_read_unlock(); return; } init_wait(&wait); spin_lock_irq(&p->wait.lock); if (likely(!p->done)) { p->done = -1; spin_unlock_irq(&p->wait.lock); rcu_read_unlock(); p->kill(p); return; } if (p->done > 0) { spin_unlock_irq(&p->wait.lock); rcu_read_unlock(); return; } __add_wait_queue(&p->wait, &wait); while (1) { set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock_irq(&p->wait.lock); rcu_read_unlock(); schedule(); rcu_read_lock(); if (likely(list_empty(&wait.entry))) break; /* OK, we know p couldn't have been freed yet */ spin_lock_irq(&p->wait.lock); if (p->done > 0) { spin_unlock_irq(&p->wait.lock); break; } } rcu_read_unlock(); } void mnt_pin_kill(struct mount *m) { while (1) { struct hlist_node *p; rcu_read_lock(); p = READ_ONCE(m->mnt_pins.first); if (!p) { rcu_read_unlock(); break; } pin_kill(hlist_entry(p, struct fs_pin, m_list)); } } void group_pin_kill(struct hlist_head *p) { while (1) { struct hlist_node *q; rcu_read_lock(); q = READ_ONCE(p->first); if (!q) { rcu_read_unlock(); break; } pin_kill(hlist_entry(q, struct fs_pin, s_list)); } }
602 15 602 599 485 521 2014 484 2009 225 67 156 46 738 391 474 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 // SPDX-License-Identifier: GPL-2.0 #include <linux/types.h> #include <linux/atomic.h> #include <linux/inetdevice.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <net/netfilter/nf_nat_masquerade.h> struct masq_dev_work { struct work_struct work; struct net *net; netns_tracker ns_tracker; union nf_inet_addr addr; int ifindex; int (*iter)(struct nf_conn *i, void *data); }; #define MAX_MASQ_WORKER_COUNT 16 static DEFINE_MUTEX(masq_mutex); static unsigned int masq_refcnt __read_mostly; static atomic_t masq_worker_count __read_mostly; unsigned int nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, const struct nf_nat_range2 *range, const struct net_device *out) { struct nf_conn *ct; struct nf_conn_nat *nat; enum ip_conntrack_info ctinfo; struct nf_nat_range2 newrange; const struct rtable *rt; __be32 newsrc, nh; WARN_ON(hooknum != NF_INET_POST_ROUTING); ct = nf_ct_get(skb, &ctinfo); WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY))); /* Source address is 0.0.0.0 - locally generated packet that is * probably not supposed to be masqueraded. */ if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) return NF_ACCEPT; rt = skb_rtable(skb); nh = rt_nexthop(rt, ip_hdr(skb)->daddr); newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE); if (!newsrc) { pr_info("%s ate my IP address\n", out->name); return NF_DROP; } nat = nf_ct_nat_ext_add(ct); if (nat) nat->masq_index = out->ifindex; /* Transfer from original range. */ memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; newrange.min_addr.ip = newsrc; newrange.max_addr.ip = newsrc; newrange.min_proto = range->min_proto; newrange.max_proto = range->max_proto; /* Hand modified range to generic setup. */ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); } EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4); static void iterate_cleanup_work(struct work_struct *work) { struct nf_ct_iter_data iter_data = {}; struct masq_dev_work *w; w = container_of(work, struct masq_dev_work, work); iter_data.net = w->net; iter_data.data = (void *)w; nf_ct_iterate_cleanup_net(w->iter, &iter_data); put_net_track(w->net, &w->ns_tracker); kfree(w); atomic_dec(&masq_worker_count); module_put(THIS_MODULE); } /* Iterate conntrack table in the background and remove conntrack entries * that use the device/address being removed. * * In case too many work items have been queued already or memory allocation * fails iteration is skipped, conntrack entries will time out eventually. */ static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr, int ifindex, int (*iter)(struct nf_conn *i, void *data), gfp_t gfp_flags) { struct masq_dev_work *w; if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT) return; net = maybe_get_net(net); if (!net) return; if (!try_module_get(THIS_MODULE)) goto err_module; w = kzalloc(sizeof(*w), gfp_flags); if (w) { /* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */ atomic_inc(&masq_worker_count); INIT_WORK(&w->work, iterate_cleanup_work); w->ifindex = ifindex; w->net = net; netns_tracker_alloc(net, &w->ns_tracker, gfp_flags); w->iter = iter; if (addr) w->addr = *addr; schedule_work(&w->work); return; } module_put(THIS_MODULE); err_module: put_net(net); } static int device_cmp(struct nf_conn *i, void *arg) { const struct nf_conn_nat *nat = nfct_nat(i); const struct masq_dev_work *w = arg; if (!nat) return 0; return nat->masq_index == w->ifindex; } static int masq_device_event(struct notifier_block *this, unsigned long event, void *ptr) { const struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); if (event == NETDEV_DOWN) { /* Device was downed. Search entire table for * conntracks which were associated with that device, * and forget them. */ nf_nat_masq_schedule(net, NULL, dev->ifindex, device_cmp, GFP_KERNEL); } return NOTIFY_DONE; } static int inet_cmp(struct nf_conn *ct, void *ptr) { struct nf_conntrack_tuple *tuple; struct masq_dev_work *w = ptr; if (!device_cmp(ct, ptr)) return 0; tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3); } static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { const struct in_ifaddr *ifa = ptr; const struct in_device *idev; const struct net_device *dev; union nf_inet_addr addr; if (event != NETDEV_DOWN) return NOTIFY_DONE; /* The masq_dev_notifier will catch the case of the device going * down. So if the inetdev is dead and being destroyed we have * no work to do. Otherwise this is an individual address removal * and we have to perform the flush. */ idev = ifa->ifa_dev; if (idev->dead) return NOTIFY_DONE; memset(&addr, 0, sizeof(addr)); addr.ip = ifa->ifa_address; dev = idev->dev; nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex, inet_cmp, GFP_KERNEL); return NOTIFY_DONE; } static struct notifier_block masq_dev_notifier = { .notifier_call = masq_device_event, }; static struct notifier_block masq_inet_notifier = { .notifier_call = masq_inet_event, }; #if IS_ENABLED(CONFIG_IPV6) static int nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr) { #ifdef CONFIG_IPV6_MODULE const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); if (!v6_ops) return -EHOSTUNREACH; return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr); #else return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr); #endif } unsigned int nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, const struct net_device *out) { enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; struct in6_addr src; struct nf_conn *ct; struct nf_nat_range2 newrange; ct = nf_ct_get(skb, &ctinfo); WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY))); if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out, &ipv6_hdr(skb)->daddr, 0, &src) < 0) return NF_DROP; nat = nf_ct_nat_ext_add(ct); if (nat) nat->masq_index = out->ifindex; newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; newrange.min_addr.in6 = src; newrange.max_addr.in6 = src; newrange.min_proto = range->min_proto; newrange.max_proto = range->max_proto; return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); } EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6); /* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep). * * Defer it to the system workqueue. * * As we can have 'a lot' of inet_events (depending on amount of ipv6 * addresses being deleted), we also need to limit work item queue. */ static int masq_inet6_event(struct notifier_block *this, unsigned long event, void *ptr) { struct inet6_ifaddr *ifa = ptr; const struct net_device *dev; union nf_inet_addr addr; if (event != NETDEV_DOWN) return NOTIFY_DONE; dev = ifa->idev->dev; memset(&addr, 0, sizeof(addr)); addr.in6 = ifa->addr; nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp, GFP_ATOMIC); return NOTIFY_DONE; } static struct notifier_block masq_inet6_notifier = { .notifier_call = masq_inet6_event, }; static int nf_nat_masquerade_ipv6_register_notifier(void) { return register_inet6addr_notifier(&masq_inet6_notifier); } #else static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; } #endif int nf_nat_masquerade_inet_register_notifiers(void) { int ret = 0; mutex_lock(&masq_mutex); if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) { ret = -EOVERFLOW; goto out_unlock; } /* check if the notifier was already set */ if (++masq_refcnt > 1) goto out_unlock; /* Register for device down reports */ ret = register_netdevice_notifier(&masq_dev_notifier); if (ret) goto err_dec; /* Register IP address change reports */ ret = register_inetaddr_notifier(&masq_inet_notifier); if (ret) goto err_unregister; ret = nf_nat_masquerade_ipv6_register_notifier(); if (ret) goto err_unreg_inet; mutex_unlock(&masq_mutex); return ret; err_unreg_inet: unregister_inetaddr_notifier(&masq_inet_notifier); err_unregister: unregister_netdevice_notifier(&masq_dev_notifier); err_dec: masq_refcnt--; out_unlock: mutex_unlock(&masq_mutex); return ret; } EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers); void nf_nat_masquerade_inet_unregister_notifiers(void) { mutex_lock(&masq_mutex); /* check if the notifiers still have clients */ if (--masq_refcnt > 0) goto out_unlock; unregister_netdevice_notifier(&masq_dev_notifier); unregister_inetaddr_notifier(&masq_inet_notifier); #if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&masq_inet6_notifier); #endif out_unlock: mutex_unlock(&masq_mutex); } EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers);
118 105 99 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * * Copyright (c) 2018 VMware, Inc., Palo Alto, CA., USA * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Deepak Rawat <drawat@vmware.com> * Rob Clark <robdclark@gmail.com> * **************************************************************************/ #include <linux/export.h> #include <drm/drm_atomic.h> #include <drm/drm_damage_helper.h> #include <drm/drm_device.h> #include <drm/drm_framebuffer.h> static void convert_clip_rect_to_rect(const struct drm_clip_rect *src, struct drm_mode_rect *dest, uint32_t num_clips, uint32_t src_inc) { while (num_clips > 0) { dest->x1 = src->x1; dest->y1 = src->y1; dest->x2 = src->x2; dest->y2 = src->y2; src += src_inc; dest++; num_clips--; } } /** * drm_atomic_helper_check_plane_damage - Verify plane damage on atomic_check. * @state: The driver state object. * @plane_state: Plane state for which to verify damage. * * This helper function makes sure that damage from plane state is discarded * for full modeset. If there are more reasons a driver would want to do a full * plane update rather than processing individual damage regions, then those * cases should be taken care of here. * * Note that &drm_plane_state.fb_damage_clips == NULL in plane state means that * full plane update should happen. It also ensure helper iterator will return * &drm_plane_state.src as damage. */ void drm_atomic_helper_check_plane_damage(struct drm_atomic_state *state, struct drm_plane_state *plane_state) { struct drm_crtc_state *crtc_state; if (plane_state->crtc) { crtc_state = drm_atomic_get_new_crtc_state(state, plane_state->crtc); if (WARN_ON(!crtc_state)) return; if (drm_atomic_crtc_needs_modeset(crtc_state)) { drm_property_blob_put(plane_state->fb_damage_clips); plane_state->fb_damage_clips = NULL; } } } EXPORT_SYMBOL(drm_atomic_helper_check_plane_damage); /** * drm_atomic_helper_dirtyfb - Helper for dirtyfb. * @fb: DRM framebuffer. * @file_priv: Drm file for the ioctl call. * @flags: Dirty fb annotate flags. * @color: Color for annotate fill. * @clips: Dirty region. * @num_clips: Count of clip in clips. * * A helper to implement &drm_framebuffer_funcs.dirty using damage interface * during plane update. If num_clips is 0 then this helper will do a full plane * update. This is the same behaviour expected by DIRTFB IOCTL. * * Note that this helper is blocking implementation. This is what current * drivers and userspace expect in their DIRTYFB IOCTL implementation, as a way * to rate-limit userspace and make sure its rendering doesn't get ahead of * uploading new data too much. * * Return: Zero on success, negative errno on failure. */ int drm_atomic_helper_dirtyfb(struct drm_framebuffer *fb, struct drm_file *file_priv, unsigned int flags, unsigned int color, struct drm_clip_rect *clips, unsigned int num_clips) { struct drm_modeset_acquire_ctx ctx; struct drm_property_blob *damage = NULL; struct drm_mode_rect *rects = NULL; struct drm_atomic_state *state; struct drm_plane *plane; int ret = 0; /* * When called from ioctl, we are interruptible, but not when called * internally (ie. defio worker) */ drm_modeset_acquire_init(&ctx, file_priv ? DRM_MODESET_ACQUIRE_INTERRUPTIBLE : 0); state = drm_atomic_state_alloc(fb->dev); if (!state) { ret = -ENOMEM; goto out_drop_locks; } state->acquire_ctx = &ctx; if (clips) { uint32_t inc = 1; if (flags & DRM_MODE_FB_DIRTY_ANNOTATE_COPY) { inc = 2; num_clips /= 2; } rects = kcalloc(num_clips, sizeof(*rects), GFP_KERNEL); if (!rects) { ret = -ENOMEM; goto out; } convert_clip_rect_to_rect(clips, rects, num_clips, inc); damage = drm_property_create_blob(fb->dev, num_clips * sizeof(*rects), rects); if (IS_ERR(damage)) { ret = PTR_ERR(damage); damage = NULL; goto out; } } retry: drm_for_each_plane(plane, fb->dev) { struct drm_plane_state *plane_state; ret = drm_modeset_lock(&plane->mutex, state->acquire_ctx); if (ret) goto out; if (plane->state->fb != fb) { drm_modeset_unlock(&plane->mutex); continue; } plane_state = drm_atomic_get_plane_state(state, plane); if (IS_ERR(plane_state)) { ret = PTR_ERR(plane_state); goto out; } drm_property_replace_blob(&plane_state->fb_damage_clips, damage); } ret = drm_atomic_commit(state); out: if (ret == -EDEADLK) { drm_atomic_state_clear(state); ret = drm_modeset_backoff(&ctx); if (!ret) goto retry; } drm_property_blob_put(damage); kfree(rects); drm_atomic_state_put(state); out_drop_locks: drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); return ret; } EXPORT_SYMBOL(drm_atomic_helper_dirtyfb); /** * drm_atomic_helper_damage_iter_init - Initialize the damage iterator. * @iter: The iterator to initialize. * @old_state: Old plane state for validation. * @state: Plane state from which to iterate the damage clips. * * Initialize an iterator, which clips plane damage * &drm_plane_state.fb_damage_clips to plane &drm_plane_state.src. This iterator * returns full plane src in case damage is not present because either * user-space didn't sent or driver discarded it (it want to do full plane * update). Currently this iterator returns full plane src in case plane src * changed but that can be changed in future to return damage. * * For the case when plane is not visible or plane update should not happen the * first call to iter_next will return false. Note that this helper use clipped * &drm_plane_state.src, so driver calling this helper should have called * drm_atomic_helper_check_plane_state() earlier. */ void drm_atomic_helper_damage_iter_init(struct drm_atomic_helper_damage_iter *iter, const struct drm_plane_state *old_state, const struct drm_plane_state *state) { struct drm_rect src; memset(iter, 0, sizeof(*iter)); if (!state || !state->crtc || !state->fb || !state->visible) return; iter->clips = (struct drm_rect *)drm_plane_get_damage_clips(state); iter->num_clips = drm_plane_get_damage_clips_count(state); /* Round down for x1/y1 and round up for x2/y2 to catch all pixels */ src = drm_plane_state_src(state); iter->plane_src.x1 = src.x1 >> 16; iter->plane_src.y1 = src.y1 >> 16; iter->plane_src.x2 = (src.x2 >> 16) + !!(src.x2 & 0xFFFF); iter->plane_src.y2 = (src.y2 >> 16) + !!(src.y2 & 0xFFFF); if (!iter->clips || state->ignore_damage_clips || !drm_rect_equals(&state->src, &old_state->src)) { iter->clips = NULL; iter->num_clips = 0; iter->full_update = true; } } EXPORT_SYMBOL(drm_atomic_helper_damage_iter_init); /** * drm_atomic_helper_damage_iter_next - Advance the damage iterator. * @iter: The iterator to advance. * @rect: Return a rectangle in fb coordinate clipped to plane src. * * Since plane src is in 16.16 fixed point and damage clips are whole number, * this iterator round off clips that intersect with plane src. Round down for * x1/y1 and round up for x2/y2 for the intersected coordinate. Similar rounding * off for full plane src, in case it's returned as damage. This iterator will * skip damage clips outside of plane src. * * Return: True if the output is valid, false if reached the end. * * If the first call to iterator next returns false then it means no need to * update the plane. */ bool drm_atomic_helper_damage_iter_next(struct drm_atomic_helper_damage_iter *iter, struct drm_rect *rect) { bool ret = false; if (iter->full_update) { *rect = iter->plane_src; iter->full_update = false; return true; } while (iter->curr_clip < iter->num_clips) { *rect = iter->clips[iter->curr_clip]; iter->curr_clip++; if (drm_rect_intersect(rect, &iter->plane_src)) { ret = true; break; } } return ret; } EXPORT_SYMBOL(drm_atomic_helper_damage_iter_next); /** * drm_atomic_helper_damage_merged - Merged plane damage * @old_state: Old plane state for validation. * @state: Plane state from which to iterate the damage clips. * @rect: Returns the merged damage rectangle * * This function merges any valid plane damage clips into one rectangle and * returns it in @rect. * * For details see: drm_atomic_helper_damage_iter_init() and * drm_atomic_helper_damage_iter_next(). * * Returns: * True if there is valid plane damage otherwise false. */ bool drm_atomic_helper_damage_merged(const struct drm_plane_state *old_state, const struct drm_plane_state *state, struct drm_rect *rect) { struct drm_atomic_helper_damage_iter iter; struct drm_rect clip; bool valid = false; rect->x1 = INT_MAX; rect->y1 = INT_MAX; rect->x2 = 0; rect->y2 = 0; drm_atomic_helper_damage_iter_init(&iter, old_state, state); drm_atomic_for_each_plane_damage(&iter, &clip) { rect->x1 = min(rect->x1, clip.x1); rect->y1 = min(rect->y1, clip.y1); rect->x2 = max(rect->x2, clip.x2); rect->y2 = max(rect->y2, clip.y2); valid = true; } return valid; } EXPORT_SYMBOL(drm_atomic_helper_damage_merged);
1 6 6 5 4 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 // SPDX-License-Identifier: GPL-2.0+ // // handle em28xx IR remotes via linux kernel input layer. // // Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it> // Markus Rechberger <mrechberger@gmail.com> // Mauro Carvalho Chehab <mchehab@kernel.org> // Sascha Sommer <saschasommer@freenet.de> #include "em28xx.h" #include <linux/module.h> #include <linux/init.h> #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/usb.h> #include <linux/usb/input.h> #include <linux/slab.h> #include <linux/bitrev.h> #define EM28XX_SNAPSHOT_KEY KEY_CAMERA #define EM28XX_BUTTONS_DEBOUNCED_QUERY_INTERVAL 500 /* [ms] */ #define EM28XX_BUTTONS_VOLATILE_QUERY_INTERVAL 100 /* [ms] */ static unsigned int ir_debug; module_param(ir_debug, int, 0644); MODULE_PARM_DESC(ir_debug, "enable debug messages [IR]"); #define MODULE_NAME "em28xx" #define dprintk(fmt, arg...) do { \ if (ir_debug) \ dev_printk(KERN_DEBUG, &ir->dev->intf->dev, \ "input: %s: " fmt, __func__, ## arg); \ } while (0) /* * Polling structure used by em28xx IR's */ struct em28xx_ir_poll_result { unsigned int toggle_bit:1; unsigned int read_count:7; enum rc_proto protocol; u32 scancode; }; struct em28xx_IR { struct em28xx *dev; struct rc_dev *rc; char phys[32]; /* poll decoder */ int polling; struct delayed_work work; unsigned int full_code:1; unsigned int last_readcount; u64 rc_proto; struct i2c_client *i2c_client; int (*get_key_i2c)(struct i2c_client *ir, enum rc_proto *protocol, u32 *scancode); int (*get_key)(struct em28xx_IR *ir, struct em28xx_ir_poll_result *r); }; /* * I2C IR based get keycodes - should be used with ir-kbd-i2c */ static int em28xx_get_key_terratec(struct i2c_client *i2c_dev, enum rc_proto *protocol, u32 *scancode) { int rc; unsigned char b; /* poll IR chip */ rc = i2c_master_recv(i2c_dev, &b, 1); if (rc != 1) { if (rc < 0) return rc; return -EIO; } /* * it seems that 0xFE indicates that a button is still hold * down, while 0xff indicates that no button is hold down. */ if (b == 0xff) return 0; if (b == 0xfe) /* keep old data */ return 1; *protocol = RC_PROTO_UNKNOWN; *scancode = b; return 1; } static int em28xx_get_key_em_haup(struct i2c_client *i2c_dev, enum rc_proto *protocol, u32 *scancode) { unsigned char buf[2]; int size; /* poll IR chip */ size = i2c_master_recv(i2c_dev, buf, sizeof(buf)); if (size != 2) return -EIO; /* Does eliminate repeated parity code */ if (buf[1] == 0xff) return 0; /* * Rearranges bits to the right order. * The bit order were determined experimentally by using * The original Hauppauge Grey IR and another RC5 that uses addr=0x08 * The RC5 code has 14 bits, but we've experimentally determined * the meaning for only 11 bits. * So, the code translation is not complete. Yet, it is enough to * work with the provided RC5 IR. */ *protocol = RC_PROTO_RC5; *scancode = (bitrev8(buf[1]) & 0x1f) << 8 | bitrev8(buf[0]) >> 2; return 1; } static int em28xx_get_key_pinnacle_usb_grey(struct i2c_client *i2c_dev, enum rc_proto *protocol, u32 *scancode) { unsigned char buf[3]; /* poll IR chip */ if (i2c_master_recv(i2c_dev, buf, 3) != 3) return -EIO; if (buf[0] != 0x00) return 0; *protocol = RC_PROTO_UNKNOWN; *scancode = buf[2] & 0x3f; return 1; } static int em28xx_get_key_winfast_usbii_deluxe(struct i2c_client *i2c_dev, enum rc_proto *protocol, u32 *scancode) { unsigned char subaddr, keydetect, key; struct i2c_msg msg[] = { { .addr = i2c_dev->addr, .flags = 0, .buf = &subaddr, .len = 1 }, { .addr = i2c_dev->addr, .flags = I2C_M_RD, .buf = &keydetect, .len = 1 } }; subaddr = 0x10; if (i2c_transfer(i2c_dev->adapter, msg, 2) != 2) return -EIO; if (keydetect == 0x00) return 0; subaddr = 0x00; msg[1].buf = &key; if (i2c_transfer(i2c_dev->adapter, msg, 2) != 2) return -EIO; if (key == 0x00) return 0; *protocol = RC_PROTO_UNKNOWN; *scancode = key; return 1; } /* * Poll based get keycode functions */ /* This is for the em2860/em2880 */ static int default_polling_getkey(struct em28xx_IR *ir, struct em28xx_ir_poll_result *poll_result) { struct em28xx *dev = ir->dev; int rc; u8 msg[3] = { 0, 0, 0 }; /* * Read key toggle, brand, and key code * on registers 0x45, 0x46 and 0x47 */ rc = dev->em28xx_read_reg_req_len(dev, 0, EM28XX_R45_IR, msg, sizeof(msg)); if (rc < 0) return rc; /* Infrared toggle (Reg 0x45[7]) */ poll_result->toggle_bit = (msg[0] >> 7); /* Infrared read count (Reg 0x45[6:0] */ poll_result->read_count = (msg[0] & 0x7f); /* Remote Control Address/Data (Regs 0x46/0x47) */ switch (ir->rc_proto) { case RC_PROTO_BIT_RC5: poll_result->protocol = RC_PROTO_RC5; poll_result->scancode = RC_SCANCODE_RC5(msg[1], msg[2]); break; case RC_PROTO_BIT_NEC: poll_result->protocol = RC_PROTO_NEC; poll_result->scancode = RC_SCANCODE_NEC(msg[1], msg[2]); break; default: poll_result->protocol = RC_PROTO_UNKNOWN; poll_result->scancode = msg[1] << 8 | msg[2]; break; } return 0; } static int em2874_polling_getkey(struct em28xx_IR *ir, struct em28xx_ir_poll_result *poll_result) { struct em28xx *dev = ir->dev; int rc; u8 msg[5] = { 0, 0, 0, 0, 0 }; /* * Read key toggle, brand, and key code * on registers 0x51-55 */ rc = dev->em28xx_read_reg_req_len(dev, 0, EM2874_R51_IR, msg, sizeof(msg)); if (rc < 0) return rc; /* Infrared toggle (Reg 0x51[7]) */ poll_result->toggle_bit = (msg[0] >> 7); /* Infrared read count (Reg 0x51[6:0] */ poll_result->read_count = (msg[0] & 0x7f); /* * Remote Control Address (Reg 0x52) * Remote Control Data (Reg 0x53-0x55) */ switch (ir->rc_proto) { case RC_PROTO_BIT_RC5: poll_result->protocol = RC_PROTO_RC5; poll_result->scancode = RC_SCANCODE_RC5(msg[1], msg[2]); break; case RC_PROTO_BIT_NEC: poll_result->scancode = ir_nec_bytes_to_scancode(msg[1], msg[2], msg[3], msg[4], &poll_result->protocol); break; case RC_PROTO_BIT_RC6_0: poll_result->protocol = RC_PROTO_RC6_0; poll_result->scancode = RC_SCANCODE_RC6_0(msg[1], msg[2]); break; default: poll_result->protocol = RC_PROTO_UNKNOWN; poll_result->scancode = (msg[1] << 24) | (msg[2] << 16) | (msg[3] << 8) | msg[4]; break; } return 0; } /* * Polling code for em28xx */ static int em28xx_i2c_ir_handle_key(struct em28xx_IR *ir) { static u32 scancode; enum rc_proto protocol; int rc; rc = ir->get_key_i2c(ir->i2c_client, &protocol, &scancode); if (rc < 0) { dprintk("ir->get_key_i2c() failed: %d\n", rc); return rc; } if (rc) { dprintk("%s: proto = 0x%04x, scancode = 0x%04x\n", __func__, protocol, scancode); rc_keydown(ir->rc, protocol, scancode, 0); } return 0; } static void em28xx_ir_handle_key(struct em28xx_IR *ir) { int result; struct em28xx_ir_poll_result poll_result; /* read the registers containing the IR status */ result = ir->get_key(ir, &poll_result); if (unlikely(result < 0)) { dprintk("ir->get_key() failed: %d\n", result); return; } if (unlikely(poll_result.read_count != ir->last_readcount)) { dprintk("%s: toggle: %d, count: %d, key 0x%04x\n", __func__, poll_result.toggle_bit, poll_result.read_count, poll_result.scancode); if (ir->full_code) rc_keydown(ir->rc, poll_result.protocol, poll_result.scancode, poll_result.toggle_bit); else rc_keydown(ir->rc, RC_PROTO_UNKNOWN, poll_result.scancode & 0xff, poll_result.toggle_bit); if (ir->dev->chip_id == CHIP_ID_EM2874 || ir->dev->chip_id == CHIP_ID_EM2884) /* * The em2874 clears the readcount field every time the * register is read. The em2860/2880 datasheet says * that it is supposed to clear the readcount, but it * doesn't. So with the em2874, we are looking for a * non-zero read count as opposed to a readcount * that is incrementing */ ir->last_readcount = 0; else ir->last_readcount = poll_result.read_count; } } static void em28xx_ir_work(struct work_struct *work) { struct em28xx_IR *ir = container_of(work, struct em28xx_IR, work.work); if (ir->i2c_client) /* external i2c device */ em28xx_i2c_ir_handle_key(ir); else /* internal device */ em28xx_ir_handle_key(ir); schedule_delayed_work(&ir->work, msecs_to_jiffies(ir->polling)); } static int em28xx_ir_start(struct rc_dev *rc) { struct em28xx_IR *ir = rc->priv; INIT_DELAYED_WORK(&ir->work, em28xx_ir_work); schedule_delayed_work(&ir->work, 0); return 0; } static void em28xx_ir_stop(struct rc_dev *rc) { struct em28xx_IR *ir = rc->priv; cancel_delayed_work_sync(&ir->work); } static int em2860_ir_change_protocol(struct rc_dev *rc_dev, u64 *rc_proto) { struct em28xx_IR *ir = rc_dev->priv; struct em28xx *dev = ir->dev; /* Adjust xclk based on IR table for RC5/NEC tables */ if (*rc_proto & RC_PROTO_BIT_RC5) { dev->board.xclk |= EM28XX_XCLK_IR_RC5_MODE; ir->full_code = 1; *rc_proto = RC_PROTO_BIT_RC5; } else if (*rc_proto & RC_PROTO_BIT_NEC) { dev->board.xclk &= ~EM28XX_XCLK_IR_RC5_MODE; ir->full_code = 1; *rc_proto = RC_PROTO_BIT_NEC; } else if (*rc_proto & RC_PROTO_BIT_UNKNOWN) { *rc_proto = RC_PROTO_BIT_UNKNOWN; } else { *rc_proto = ir->rc_proto; return -EINVAL; } em28xx_write_reg_bits(dev, EM28XX_R0F_XCLK, dev->board.xclk, EM28XX_XCLK_IR_RC5_MODE); ir->rc_proto = *rc_proto; return 0; } static int em2874_ir_change_protocol(struct rc_dev *rc_dev, u64 *rc_proto) { struct em28xx_IR *ir = rc_dev->priv; struct em28xx *dev = ir->dev; u8 ir_config = EM2874_IR_RC5; /* Adjust xclk and set type based on IR table for RC5/NEC/RC6 tables */ if (*rc_proto & RC_PROTO_BIT_RC5) { dev->board.xclk |= EM28XX_XCLK_IR_RC5_MODE; ir->full_code = 1; *rc_proto = RC_PROTO_BIT_RC5; } else if (*rc_proto & RC_PROTO_BIT_NEC) { dev->board.xclk &= ~EM28XX_XCLK_IR_RC5_MODE; ir_config = EM2874_IR_NEC | EM2874_IR_NEC_NO_PARITY; ir->full_code = 1; *rc_proto = RC_PROTO_BIT_NEC; } else if (*rc_proto & RC_PROTO_BIT_RC6_0) { dev->board.xclk |= EM28XX_XCLK_IR_RC5_MODE; ir_config = EM2874_IR_RC6_MODE_0; ir->full_code = 1; *rc_proto = RC_PROTO_BIT_RC6_0; } else if (*rc_proto & RC_PROTO_BIT_UNKNOWN) { *rc_proto = RC_PROTO_BIT_UNKNOWN; } else { *rc_proto = ir->rc_proto; return -EINVAL; } em28xx_write_regs(dev, EM2874_R50_IR_CONFIG, &ir_config, 1); em28xx_write_reg_bits(dev, EM28XX_R0F_XCLK, dev->board.xclk, EM28XX_XCLK_IR_RC5_MODE); ir->rc_proto = *rc_proto; return 0; } static int em28xx_ir_change_protocol(struct rc_dev *rc_dev, u64 *rc_proto) { struct em28xx_IR *ir = rc_dev->priv; struct em28xx *dev = ir->dev; /* Setup the proper handler based on the chip */ switch (dev->chip_id) { case CHIP_ID_EM2860: case CHIP_ID_EM2883: return em2860_ir_change_protocol(rc_dev, rc_proto); case CHIP_ID_EM2884: case CHIP_ID_EM2874: case CHIP_ID_EM28174: case CHIP_ID_EM28178: return em2874_ir_change_protocol(rc_dev, rc_proto); default: dev_err(&ir->dev->intf->dev, "Unrecognized em28xx chip id 0x%02x: IR not supported\n", dev->chip_id); return -EINVAL; } } static int em28xx_probe_i2c_ir(struct em28xx *dev) { int i = 0; /* * Leadtek winfast tv USBII deluxe can find a non working IR-device * at address 0x18, so if that address is needed for another board in * the future, please put it after 0x1f. */ static const unsigned short addr_list[] = { 0x1f, 0x30, 0x47, I2C_CLIENT_END }; while (addr_list[i] != I2C_CLIENT_END) { if (i2c_probe_func_quick_read(&dev->i2c_adap[dev->def_i2c_bus], addr_list[i]) == 1) return addr_list[i]; i++; } return -ENODEV; } /* * Handle buttons */ static void em28xx_query_buttons(struct work_struct *work) { struct em28xx *dev = container_of(work, struct em28xx, buttons_query_work.work); u8 i, j; int regval; bool is_pressed, was_pressed; const struct em28xx_led *led; /* Poll and evaluate all addresses */ for (i = 0; i < dev->num_button_polling_addresses; i++) { /* Read value from register */ regval = em28xx_read_reg(dev, dev->button_polling_addresses[i]); if (regval < 0) continue; /* Check states of the buttons and act */ j = 0; while (dev->board.buttons[j].role >= 0 && dev->board.buttons[j].role < EM28XX_NUM_BUTTON_ROLES) { const struct em28xx_button *button; button = &dev->board.buttons[j]; /* Check if button uses the current address */ if (button->reg_r != dev->button_polling_addresses[i]) { j++; continue; } /* Determine if button is and was pressed last time */ is_pressed = regval & button->mask; was_pressed = dev->button_polling_last_values[i] & button->mask; if (button->inverted) { is_pressed = !is_pressed; was_pressed = !was_pressed; } /* Clear button state (if needed) */ if (is_pressed && button->reg_clearing) em28xx_write_reg(dev, button->reg_clearing, (~regval & button->mask) | (regval & ~button->mask)); /* Handle button state */ if (!is_pressed || was_pressed) { j++; continue; } switch (button->role) { case EM28XX_BUTTON_SNAPSHOT: /* Emulate the keypress */ input_report_key(dev->sbutton_input_dev, EM28XX_SNAPSHOT_KEY, 1); /* Unpress the key */ input_report_key(dev->sbutton_input_dev, EM28XX_SNAPSHOT_KEY, 0); break; case EM28XX_BUTTON_ILLUMINATION: led = em28xx_find_led(dev, EM28XX_LED_ILLUMINATION); /* Switch illumination LED on/off */ if (led) em28xx_toggle_reg_bits(dev, led->gpio_reg, led->gpio_mask); break; default: WARN_ONCE(1, "BUG: unhandled button role."); } /* Next button */ j++; } /* Save current value for comparison during the next polling */ dev->button_polling_last_values[i] = regval; } /* Schedule next poll */ schedule_delayed_work(&dev->buttons_query_work, msecs_to_jiffies(dev->button_polling_interval)); } static int em28xx_register_snapshot_button(struct em28xx *dev) { struct usb_device *udev = interface_to_usbdev(dev->intf); struct input_dev *input_dev; int err; dev_info(&dev->intf->dev, "Registering snapshot button...\n"); input_dev = input_allocate_device(); if (!input_dev) return -ENOMEM; usb_make_path(udev, dev->snapshot_button_path, sizeof(dev->snapshot_button_path)); strlcat(dev->snapshot_button_path, "/sbutton", sizeof(dev->snapshot_button_path)); input_dev->name = "em28xx snapshot button"; input_dev->phys = dev->snapshot_button_path; input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP); set_bit(EM28XX_SNAPSHOT_KEY, input_dev->keybit); input_dev->keycodesize = 0; input_dev->keycodemax = 0; usb_to_input_id(udev, &input_dev->id); input_dev->dev.parent = &dev->intf->dev; err = input_register_device(input_dev); if (err) { dev_err(&dev->intf->dev, "input_register_device failed\n"); input_free_device(input_dev); return err; } dev->sbutton_input_dev = input_dev; return 0; } static void em28xx_init_buttons(struct em28xx *dev) { u8 i = 0, j = 0; bool addr_new = false; dev->button_polling_interval = EM28XX_BUTTONS_DEBOUNCED_QUERY_INTERVAL; while (dev->board.buttons[i].role >= 0 && dev->board.buttons[i].role < EM28XX_NUM_BUTTON_ROLES) { const struct em28xx_button *button = &dev->board.buttons[i]; /* Check if polling address is already on the list */ addr_new = true; for (j = 0; j < dev->num_button_polling_addresses; j++) { if (button->reg_r == dev->button_polling_addresses[j]) { addr_new = false; break; } } /* Check if max. number of polling addresses is exceeded */ if (addr_new && dev->num_button_polling_addresses >= EM28XX_NUM_BUTTON_ADDRESSES_MAX) { WARN_ONCE(1, "BUG: maximum number of button polling addresses exceeded."); goto next_button; } /* Button role specific checks and actions */ if (button->role == EM28XX_BUTTON_SNAPSHOT) { /* Register input device */ if (em28xx_register_snapshot_button(dev) < 0) goto next_button; } else if (button->role == EM28XX_BUTTON_ILLUMINATION) { /* Check sanity */ if (!em28xx_find_led(dev, EM28XX_LED_ILLUMINATION)) { dev_err(&dev->intf->dev, "BUG: illumination button defined, but no illumination LED.\n"); goto next_button; } } /* Add read address to list of polling addresses */ if (addr_new) { unsigned int index = dev->num_button_polling_addresses; dev->button_polling_addresses[index] = button->reg_r; dev->num_button_polling_addresses++; } /* Reduce polling interval if necessary */ if (!button->reg_clearing) dev->button_polling_interval = EM28XX_BUTTONS_VOLATILE_QUERY_INTERVAL; next_button: /* Next button */ i++; } /* Start polling */ if (dev->num_button_polling_addresses) { memset(dev->button_polling_last_values, 0, EM28XX_NUM_BUTTON_ADDRESSES_MAX); schedule_delayed_work(&dev->buttons_query_work, msecs_to_jiffies(dev->button_polling_interval)); } } static void em28xx_shutdown_buttons(struct em28xx *dev) { /* Cancel polling */ cancel_delayed_work_sync(&dev->buttons_query_work); /* Clear polling addresses list */ dev->num_button_polling_addresses = 0; /* Deregister input devices */ if (dev->sbutton_input_dev) { dev_info(&dev->intf->dev, "Deregistering snapshot button\n"); input_unregister_device(dev->sbutton_input_dev); dev->sbutton_input_dev = NULL; } } static int em28xx_ir_init(struct em28xx *dev) { struct usb_device *udev = interface_to_usbdev(dev->intf); struct em28xx_IR *ir; struct rc_dev *rc; int err = -ENOMEM; u64 rc_proto; u16 i2c_rc_dev_addr = 0; if (dev->is_audio_only) { /* Shouldn't initialize IR for this interface */ return 0; } kref_get(&dev->ref); INIT_DELAYED_WORK(&dev->buttons_query_work, em28xx_query_buttons); if (dev->board.buttons) em28xx_init_buttons(dev); if (dev->board.has_ir_i2c) { i2c_rc_dev_addr = em28xx_probe_i2c_ir(dev); if (!i2c_rc_dev_addr) { dev->board.has_ir_i2c = 0; dev_warn(&dev->intf->dev, "No i2c IR remote control device found.\n"); err = -ENODEV; goto ref_put; } } if (!dev->board.ir_codes && !dev->board.has_ir_i2c) { /* No remote control support */ dev_warn(&dev->intf->dev, "Remote control support is not available for this card.\n"); return 0; } dev_info(&dev->intf->dev, "Registering input extension\n"); ir = kzalloc(sizeof(*ir), GFP_KERNEL); if (!ir) goto ref_put; rc = rc_allocate_device(RC_DRIVER_SCANCODE); if (!rc) goto error; /* record handles to ourself */ ir->dev = dev; dev->ir = ir; ir->rc = rc; rc->priv = ir; rc->open = em28xx_ir_start; rc->close = em28xx_ir_stop; if (dev->board.has_ir_i2c) { /* external i2c device */ switch (dev->model) { case EM2800_BOARD_TERRATEC_CINERGY_200: case EM2820_BOARD_TERRATEC_CINERGY_250: rc->map_name = RC_MAP_EM_TERRATEC; ir->get_key_i2c = em28xx_get_key_terratec; break; case EM2820_BOARD_PINNACLE_USB_2: rc->map_name = RC_MAP_PINNACLE_GREY; ir->get_key_i2c = em28xx_get_key_pinnacle_usb_grey; break; case EM2820_BOARD_HAUPPAUGE_WINTV_USB_2: rc->map_name = RC_MAP_HAUPPAUGE; ir->get_key_i2c = em28xx_get_key_em_haup; rc->allowed_protocols = RC_PROTO_BIT_RC5; break; case EM2820_BOARD_LEADTEK_WINFAST_USBII_DELUXE: rc->map_name = RC_MAP_WINFAST_USBII_DELUXE; ir->get_key_i2c = em28xx_get_key_winfast_usbii_deluxe; break; default: err = -ENODEV; goto error; } ir->i2c_client = kzalloc(sizeof(*ir->i2c_client), GFP_KERNEL); if (!ir->i2c_client) goto error; ir->i2c_client->adapter = &ir->dev->i2c_adap[dev->def_i2c_bus]; ir->i2c_client->addr = i2c_rc_dev_addr; ir->i2c_client->flags = 0; /* NOTE: all other fields of i2c_client are unused */ } else { /* internal device */ switch (dev->chip_id) { case CHIP_ID_EM2860: case CHIP_ID_EM2883: rc->allowed_protocols = RC_PROTO_BIT_RC5 | RC_PROTO_BIT_NEC; ir->get_key = default_polling_getkey; break; case CHIP_ID_EM2884: case CHIP_ID_EM2874: case CHIP_ID_EM28174: case CHIP_ID_EM28178: ir->get_key = em2874_polling_getkey; rc->allowed_protocols = RC_PROTO_BIT_RC5 | RC_PROTO_BIT_NEC | RC_PROTO_BIT_NECX | RC_PROTO_BIT_NEC32 | RC_PROTO_BIT_RC6_0; break; default: err = -ENODEV; goto error; } rc->change_protocol = em28xx_ir_change_protocol; rc->map_name = dev->board.ir_codes; /* By default, keep protocol field untouched */ rc_proto = RC_PROTO_BIT_UNKNOWN; err = em28xx_ir_change_protocol(rc, &rc_proto); if (err) goto error; } /* This is how often we ask the chip for IR information */ ir->polling = 100; /* ms */ usb_make_path(udev, ir->phys, sizeof(ir->phys)); strlcat(ir->phys, "/input0", sizeof(ir->phys)); rc->device_name = em28xx_boards[dev->model].name; rc->input_phys = ir->phys; usb_to_input_id(udev, &rc->input_id); rc->dev.parent = &dev->intf->dev; rc->driver_name = MODULE_NAME; /* all done */ err = rc_register_device(rc); if (err) goto error; dev_info(&dev->intf->dev, "Input extension successfully initialized\n"); return 0; error: kfree(ir->i2c_client); dev->ir = NULL; rc_free_device(rc); kfree(ir); ref_put: em28xx_shutdown_buttons(dev); return err; } static int em28xx_ir_fini(struct em28xx *dev) { struct em28xx_IR *ir = dev->ir; if (dev->is_audio_only) { /* Shouldn't initialize IR for this interface */ return 0; } dev_info(&dev->intf->dev, "Closing input extension\n"); em28xx_shutdown_buttons(dev); /* skip detach on non attached boards */ if (!ir) goto ref_put; rc_unregister_device(ir->rc); kfree(ir->i2c_client); /* done */ kfree(ir); dev->ir = NULL; ref_put: kref_put(&dev->ref, em28xx_free_device); return 0; } static int em28xx_ir_suspend(struct em28xx *dev) { struct em28xx_IR *ir = dev->ir; if (dev->is_audio_only) return 0; dev_info(&dev->intf->dev, "Suspending input extension\n"); if (ir) cancel_delayed_work_sync(&ir->work); cancel_delayed_work_sync(&dev->buttons_query_work); /* * is canceling delayed work sufficient or does the rc event * kthread needs stopping? kthread is stopped in * ir_raw_event_unregister() */ return 0; } static int em28xx_ir_resume(struct em28xx *dev) { struct em28xx_IR *ir = dev->ir; if (dev->is_audio_only) return 0; dev_info(&dev->intf->dev, "Resuming input extension\n"); /* * if suspend calls ir_raw_event_unregister(), the should call * ir_raw_event_register() */ if (ir) schedule_delayed_work(&ir->work, msecs_to_jiffies(ir->polling)); if (dev->num_button_polling_addresses) schedule_delayed_work(&dev->buttons_query_work, msecs_to_jiffies(dev->button_polling_interval)); return 0; } static struct em28xx_ops rc_ops = { .id = EM28XX_RC, .name = "Em28xx Input Extension", .init = em28xx_ir_init, .fini = em28xx_ir_fini, .suspend = em28xx_ir_suspend, .resume = em28xx_ir_resume, }; static int __init em28xx_rc_register(void) { return em28xx_register_extension(&rc_ops); } static void __exit em28xx_rc_unregister(void) { em28xx_unregister_extension(&rc_ops); } MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Mauro Carvalho Chehab"); MODULE_DESCRIPTION(DRIVER_DESC " - input interface"); MODULE_VERSION(EM28XX_VERSION); module_init(em28xx_rc_register); module_exit(em28xx_rc_unregister);
23 6 2 2 11 2 6 6 19 8 21 1 1 2 2 16 27 6 8 15 24 12 2 1 9 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2017 Facebook */ #include <linux/slab.h> #include <linux/bpf.h> #include <linux/btf.h> #include "map_in_map.h" struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) { struct bpf_map *inner_map, *inner_map_meta; u32 inner_map_meta_size; CLASS(fd, f)(inner_map_ufd); inner_map = __bpf_map_get(f); if (IS_ERR(inner_map)) return inner_map; /* Does not support >1 level map-in-map */ if (inner_map->inner_map_meta) return ERR_PTR(-EINVAL); if (!inner_map->ops->map_meta_equal) return ERR_PTR(-ENOTSUPP); inner_map_meta_size = sizeof(*inner_map_meta); /* In some cases verifier needs to access beyond just base map. */ if (inner_map->ops == &array_map_ops || inner_map->ops == &percpu_array_map_ops) inner_map_meta_size = sizeof(struct bpf_array); inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER); if (!inner_map_meta) return ERR_PTR(-ENOMEM); inner_map_meta->map_type = inner_map->map_type; inner_map_meta->key_size = inner_map->key_size; inner_map_meta->value_size = inner_map->value_size; inner_map_meta->map_flags = inner_map->map_flags; inner_map_meta->max_entries = inner_map->max_entries; inner_map_meta->record = btf_record_dup(inner_map->record); if (IS_ERR(inner_map_meta->record)) { /* btf_record_dup returns NULL or valid pointer in case of * invalid/empty/valid, but ERR_PTR in case of errors. During * equality NULL or IS_ERR is equivalent. */ struct bpf_map *ret = ERR_CAST(inner_map_meta->record); kfree(inner_map_meta); return ret; } /* Note: We must use the same BTF, as we also used btf_record_dup above * which relies on BTF being same for both maps, as some members like * record->fields.list_head have pointers like value_rec pointing into * inner_map->btf. */ if (inner_map->btf) { btf_get(inner_map->btf); inner_map_meta->btf = inner_map->btf; } /* Misc members not needed in bpf_map_meta_equal() check. */ inner_map_meta->ops = inner_map->ops; if (inner_map->ops == &array_map_ops || inner_map->ops == &percpu_array_map_ops) { struct bpf_array *inner_array_meta = container_of(inner_map_meta, struct bpf_array, map); struct bpf_array *inner_array = container_of(inner_map, struct bpf_array, map); inner_array_meta->index_mask = inner_array->index_mask; inner_array_meta->elem_size = inner_array->elem_size; inner_map_meta->bypass_spec_v1 = inner_map->bypass_spec_v1; } return inner_map_meta; } void bpf_map_meta_free(struct bpf_map *map_meta) { bpf_map_free_record(map_meta); btf_put(map_meta->btf); kfree(map_meta); } bool bpf_map_meta_equal(const struct bpf_map *meta0, const struct bpf_map *meta1) { /* No need to compare ops because it is covered by map_type */ return meta0->map_type == meta1->map_type && meta0->key_size == meta1->key_size && meta0->value_size == meta1->value_size && meta0->map_flags == meta1->map_flags && btf_record_equal(meta0->record, meta1->record); } void *bpf_map_fd_get_ptr(struct bpf_map *map, struct file *map_file /* not used */, int ufd) { struct bpf_map *inner_map, *inner_map_meta; CLASS(fd, f)(ufd); inner_map = __bpf_map_get(f); if (IS_ERR(inner_map)) return inner_map; inner_map_meta = map->inner_map_meta; if (inner_map_meta->ops->map_meta_equal(inner_map_meta, inner_map)) bpf_map_inc(inner_map); else inner_map = ERR_PTR(-EINVAL); return inner_map; } void bpf_map_fd_put_ptr(struct bpf_map *map, void *ptr, bool need_defer) { struct bpf_map *inner_map = ptr; /* Defer the freeing of inner map according to the sleepable attribute * of bpf program which owns the outer map, so unnecessary waiting for * RCU tasks trace grace period can be avoided. */ if (need_defer) { if (atomic64_read(&map->sleepable_refcnt)) WRITE_ONCE(inner_map->free_after_mult_rcu_gp, true); else WRITE_ONCE(inner_map->free_after_rcu_gp, true); } bpf_map_put(inner_map); } u32 bpf_map_fd_sys_lookup_elem(void *ptr) { return ((struct bpf_map *)ptr)->id; }
40 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_SHARED_IO_H #define _ASM_X86_SHARED_IO_H #include <linux/types.h> #define BUILDIO(bwl, bw, type) \ static __always_inline void __out##bwl(type value, u16 port) \ { \ asm volatile("out" #bwl " %" #bw "0, %w1" \ : : "a"(value), "Nd"(port)); \ } \ \ static __always_inline type __in##bwl(u16 port) \ { \ type value; \ asm volatile("in" #bwl " %w1, %" #bw "0" \ : "=a"(value) : "Nd"(port)); \ return value; \ } BUILDIO(b, b, u8) BUILDIO(w, w, u16) BUILDIO(l, , u32) #undef BUILDIO #define inb __inb #define inw __inw #define inl __inl #define outb __outb #define outw __outw #define outl __outl #endif
18 8 6 1 4 1 2 3 3 6 6 6 18 2 2 8 12 26 12 8 8 4 6 6 6 2 1 1 3 17 1 2 1 26 26 26 26 8 16 3 1 3 2 1 1 1 1 1 12 1 7 11 6 6 5 10 9 10 40 40 23 23 230 231 18 18 15 4 3 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/list.h> #include <linux/rbtree.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> struct nft_rbtree { struct rb_root root; rwlock_t lock; seqcount_rwlock_t count; unsigned long last_gc; }; struct nft_rbtree_elem { struct nft_elem_priv priv; struct rb_node node; struct nft_set_ext ext; }; static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe) { return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) && (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END); } static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe) { return !nft_rbtree_interval_end(rbe); } static int nft_rbtree_cmp(const struct nft_set *set, const struct nft_rbtree_elem *e1, const struct nft_rbtree_elem *e2) { return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext), set->klen); } static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe) { return nft_set_elem_expired(&rbe->ext); } static const struct nft_set_ext * __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, const u32 *key, unsigned int seq) { struct nft_rbtree *priv = nft_set_priv(set); const struct nft_rbtree_elem *rbe, *interval = NULL; u8 genmask = nft_genmask_cur(net); const struct rb_node *parent; int d; parent = rcu_dereference_raw(priv->root.rb_node); while (parent != NULL) { if (read_seqcount_retry(&priv->count, seq)) return NULL; rbe = rb_entry(parent, struct nft_rbtree_elem, node); d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen); if (d < 0) { parent = rcu_dereference_raw(parent->rb_left); if (interval && !nft_rbtree_cmp(set, rbe, interval) && nft_rbtree_interval_end(rbe) && nft_rbtree_interval_start(interval)) continue; if (nft_set_elem_active(&rbe->ext, genmask) && !nft_rbtree_elem_expired(rbe)) interval = rbe; } else if (d > 0) parent = rcu_dereference_raw(parent->rb_right); else { if (!nft_set_elem_active(&rbe->ext, genmask)) { parent = rcu_dereference_raw(parent->rb_left); continue; } if (nft_rbtree_elem_expired(rbe)) return NULL; if (nft_rbtree_interval_end(rbe)) { if (nft_set_is_anonymous(set)) return NULL; parent = rcu_dereference_raw(parent->rb_left); interval = NULL; continue; } return &rbe->ext; } } if (set->flags & NFT_SET_INTERVAL && interval != NULL && nft_rbtree_interval_start(interval)) return &interval->ext; return NULL; } INDIRECT_CALLABLE_SCOPE const struct nft_set_ext * nft_rbtree_lookup(const struct net *net, const struct nft_set *set, const u32 *key) { struct nft_rbtree *priv = nft_set_priv(set); unsigned int seq = read_seqcount_begin(&priv->count); const struct nft_set_ext *ext; ext = __nft_rbtree_lookup(net, set, key, seq); if (ext || !read_seqcount_retry(&priv->count, seq)) return ext; read_lock_bh(&priv->lock); seq = read_seqcount_begin(&priv->count); ext = __nft_rbtree_lookup(net, set, key, seq); read_unlock_bh(&priv->lock); return ext; } static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set, const u32 *key, struct nft_rbtree_elem **elem, unsigned int seq, unsigned int flags, u8 genmask) { struct nft_rbtree_elem *rbe, *interval = NULL; struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent; const void *this; int d; parent = rcu_dereference_raw(priv->root.rb_node); while (parent != NULL) { if (read_seqcount_retry(&priv->count, seq)) return false; rbe = rb_entry(parent, struct nft_rbtree_elem, node); this = nft_set_ext_key(&rbe->ext); d = memcmp(this, key, set->klen); if (d < 0) { parent = rcu_dereference_raw(parent->rb_left); if (!(flags & NFT_SET_ELEM_INTERVAL_END)) interval = rbe; } else if (d > 0) { parent = rcu_dereference_raw(parent->rb_right); if (flags & NFT_SET_ELEM_INTERVAL_END) interval = rbe; } else { if (!nft_set_elem_active(&rbe->ext, genmask)) { parent = rcu_dereference_raw(parent->rb_left); continue; } if (nft_set_elem_expired(&rbe->ext)) return false; if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) || (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) == (flags & NFT_SET_ELEM_INTERVAL_END)) { *elem = rbe; return true; } if (nft_rbtree_interval_end(rbe)) interval = NULL; parent = rcu_dereference_raw(parent->rb_left); } } if (set->flags & NFT_SET_INTERVAL && interval != NULL && nft_set_elem_active(&interval->ext, genmask) && !nft_set_elem_expired(&interval->ext) && ((!nft_rbtree_interval_end(interval) && !(flags & NFT_SET_ELEM_INTERVAL_END)) || (nft_rbtree_interval_end(interval) && (flags & NFT_SET_ELEM_INTERVAL_END)))) { *elem = interval; return true; } return false; } static struct nft_elem_priv * nft_rbtree_get(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags) { struct nft_rbtree *priv = nft_set_priv(set); unsigned int seq = read_seqcount_begin(&priv->count); struct nft_rbtree_elem *rbe = ERR_PTR(-ENOENT); const u32 *key = (const u32 *)&elem->key.val; u8 genmask = nft_genmask_cur(net); bool ret; ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); if (ret || !read_seqcount_retry(&priv->count, seq)) return &rbe->priv; read_lock_bh(&priv->lock); seq = read_seqcount_begin(&priv->count); ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); read_unlock_bh(&priv->lock); if (!ret) return ERR_PTR(-ENOENT); return &rbe->priv; } static void nft_rbtree_gc_elem_remove(struct net *net, struct nft_set *set, struct nft_rbtree *priv, struct nft_rbtree_elem *rbe) { lockdep_assert_held_write(&priv->lock); nft_setelem_data_deactivate(net, set, &rbe->priv); rb_erase(&rbe->node, &priv->root); } static const struct nft_rbtree_elem * nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, struct nft_rbtree_elem *rbe) { struct nft_set *set = (struct nft_set *)__set; struct rb_node *prev = rb_prev(&rbe->node); struct net *net = read_pnet(&set->net); struct nft_rbtree_elem *rbe_prev; struct nft_trans_gc *gc; gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC); if (!gc) return ERR_PTR(-ENOMEM); /* search for end interval coming before this element. * end intervals don't carry a timeout extension, they * are coupled with the interval start element. */ while (prev) { rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); if (nft_rbtree_interval_end(rbe_prev) && nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY)) break; prev = rb_prev(prev); } rbe_prev = NULL; if (prev) { rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); nft_rbtree_gc_elem_remove(net, set, priv, rbe_prev); /* There is always room in this trans gc for this element, * memory allocation never actually happens, hence, the warning * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT, * this is synchronous gc which never fails. */ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); if (WARN_ON_ONCE(!gc)) return ERR_PTR(-ENOMEM); nft_trans_gc_elem_add(gc, rbe_prev); } nft_rbtree_gc_elem_remove(net, set, priv, rbe); gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); if (WARN_ON_ONCE(!gc)) return ERR_PTR(-ENOMEM); nft_trans_gc_elem_add(gc, rbe); nft_trans_gc_queue_sync_done(gc); return rbe_prev; } static bool nft_rbtree_update_first(const struct nft_set *set, struct nft_rbtree_elem *rbe, struct rb_node *first) { struct nft_rbtree_elem *first_elem; first_elem = rb_entry(first, struct nft_rbtree_elem, node); /* this element is closest to where the new element is to be inserted: * update the first element for the node list path. */ if (nft_rbtree_cmp(set, rbe, first_elem) < 0) return true; return false; } static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree_elem *new, struct nft_elem_priv **elem_priv) { struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL; struct rb_node *node, *next, *parent, **p, *first = NULL; struct nft_rbtree *priv = nft_set_priv(set); u8 cur_genmask = nft_genmask_cur(net); u8 genmask = nft_genmask_next(net); u64 tstamp = nft_net_tstamp(net); int d; /* Descend the tree to search for an existing element greater than the * key value to insert that is greater than the new element. This is the * first element to walk the ordered elements to find possible overlap. */ parent = NULL; p = &priv->root.rb_node; while (*p != NULL) { parent = *p; rbe = rb_entry(parent, struct nft_rbtree_elem, node); d = nft_rbtree_cmp(set, rbe, new); if (d < 0) { p = &parent->rb_left; } else if (d > 0) { if (!first || nft_rbtree_update_first(set, rbe, first)) first = &rbe->node; p = &parent->rb_right; } else { if (nft_rbtree_interval_end(rbe)) p = &parent->rb_left; else p = &parent->rb_right; } } if (!first) first = rb_first(&priv->root); /* Detect overlap by going through the list of valid tree nodes. * Values stored in the tree are in reversed order, starting from * highest to lowest value. */ for (node = first; node != NULL; node = next) { next = rb_next(node); rbe = rb_entry(node, struct nft_rbtree_elem, node); if (!nft_set_elem_active(&rbe->ext, genmask)) continue; /* perform garbage collection to avoid bogus overlap reports * but skip new elements in this transaction. */ if (__nft_set_elem_expired(&rbe->ext, tstamp) && nft_set_elem_active(&rbe->ext, cur_genmask)) { const struct nft_rbtree_elem *removed_end; removed_end = nft_rbtree_gc_elem(set, priv, rbe); if (IS_ERR(removed_end)) return PTR_ERR(removed_end); if (removed_end == rbe_le || removed_end == rbe_ge) return -EAGAIN; continue; } d = nft_rbtree_cmp(set, rbe, new); if (d == 0) { /* Matching end element: no need to look for an * overlapping greater or equal element. */ if (nft_rbtree_interval_end(rbe)) { rbe_le = rbe; break; } /* first element that is greater or equal to key value. */ if (!rbe_ge) { rbe_ge = rbe; continue; } /* this is a closer more or equal element, update it. */ if (nft_rbtree_cmp(set, rbe_ge, new) != 0) { rbe_ge = rbe; continue; } /* element is equal to key value, make sure flags are * the same, an existing more or equal start element * must not be replaced by more or equal end element. */ if ((nft_rbtree_interval_start(new) && nft_rbtree_interval_start(rbe_ge)) || (nft_rbtree_interval_end(new) && nft_rbtree_interval_end(rbe_ge))) { rbe_ge = rbe; continue; } } else if (d > 0) { /* annotate element greater than the new element. */ rbe_ge = rbe; continue; } else if (d < 0) { /* annotate element less than the new element. */ rbe_le = rbe; break; } } /* - new start element matching existing start element: full overlap * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. */ if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) && nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) { *elem_priv = &rbe_ge->priv; return -EEXIST; } /* - new end element matching existing end element: full overlap * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. */ if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) && nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) { *elem_priv = &rbe_le->priv; return -EEXIST; } /* - new start element with existing closest, less or equal key value * being a start element: partial overlap, reported as -ENOTEMPTY. * Anonymous sets allow for two consecutive start element since they * are constant, skip them to avoid bogus overlap reports. */ if (!nft_set_is_anonymous(set) && rbe_le && nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new)) return -ENOTEMPTY; /* - new end element with existing closest, less or equal key value * being a end element: partial overlap, reported as -ENOTEMPTY. */ if (rbe_le && nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new)) return -ENOTEMPTY; /* - new end element with existing closest, greater or equal key value * being an end element: partial overlap, reported as -ENOTEMPTY */ if (rbe_ge && nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new)) return -ENOTEMPTY; /* Accepted element: pick insertion point depending on key value */ parent = NULL; p = &priv->root.rb_node; while (*p != NULL) { parent = *p; rbe = rb_entry(parent, struct nft_rbtree_elem, node); d = nft_rbtree_cmp(set, rbe, new); if (d < 0) p = &parent->rb_left; else if (d > 0) p = &parent->rb_right; else if (nft_rbtree_interval_end(rbe)) p = &parent->rb_left; else p = &parent->rb_right; } rb_link_node_rcu(&new->node, parent, p); rb_insert_color(&new->node, &priv->root); return 0; } static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_elem_priv **elem_priv) { struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv); struct nft_rbtree *priv = nft_set_priv(set); int err; do { if (fatal_signal_pending(current)) return -EINTR; cond_resched(); write_lock_bh(&priv->lock); write_seqcount_begin(&priv->count); err = __nft_rbtree_insert(net, set, rbe, elem_priv); write_seqcount_end(&priv->count); write_unlock_bh(&priv->lock); } while (err == -EAGAIN); return err; } static void nft_rbtree_erase(struct nft_rbtree *priv, struct nft_rbtree_elem *rbe) { write_lock_bh(&priv->lock); write_seqcount_begin(&priv->count); rb_erase(&rbe->node, &priv->root); write_seqcount_end(&priv->count); write_unlock_bh(&priv->lock); } static void nft_rbtree_remove(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) { struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv); struct nft_rbtree *priv = nft_set_priv(set); nft_rbtree_erase(priv, rbe); } static void nft_rbtree_activate(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) { struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv); nft_clear(net, &rbe->ext); } static void nft_rbtree_flush(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) { struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &rbe->ext); } static struct nft_elem_priv * nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_rbtree_elem *rbe, *this = nft_elem_priv_cast(elem->priv); const struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; u8 genmask = nft_genmask_next(net); u64 tstamp = nft_net_tstamp(net); int d; while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val, set->klen); if (d < 0) parent = parent->rb_left; else if (d > 0) parent = parent->rb_right; else { if (nft_rbtree_interval_end(rbe) && nft_rbtree_interval_start(this)) { parent = parent->rb_left; continue; } else if (nft_rbtree_interval_start(rbe) && nft_rbtree_interval_end(this)) { parent = parent->rb_right; continue; } else if (__nft_set_elem_expired(&rbe->ext, tstamp)) { break; } else if (!nft_set_elem_active(&rbe->ext, genmask)) { parent = parent->rb_left; continue; } nft_rbtree_flush(net, set, &rbe->priv); return &rbe->priv; } } return NULL; } static void nft_rbtree_do_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter) { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; struct rb_node *node; for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { rbe = rb_entry(node, struct nft_rbtree_elem, node); if (iter->count < iter->skip) goto cont; iter->err = iter->fn(ctx, set, iter, &rbe->priv); if (iter->err < 0) return; cont: iter->count++; } } static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter) { struct nft_rbtree *priv = nft_set_priv(set); switch (iter->type) { case NFT_ITER_UPDATE: lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex); nft_rbtree_do_walk(ctx, set, iter); break; case NFT_ITER_READ: read_lock_bh(&priv->lock); nft_rbtree_do_walk(ctx, set, iter); read_unlock_bh(&priv->lock); break; default: iter->err = -EINVAL; WARN_ON_ONCE(1); break; } } static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, struct nft_rbtree *priv, struct nft_rbtree_elem *rbe) { nft_setelem_data_deactivate(net, set, &rbe->priv); nft_rbtree_erase(priv, rbe); } static void nft_rbtree_gc(struct nft_set *set) { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe, *rbe_end = NULL; struct net *net = read_pnet(&set->net); u64 tstamp = nft_net_tstamp(net); struct rb_node *node, *next; struct nft_trans_gc *gc; set = nft_set_container_of(priv); net = read_pnet(&set->net); gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); if (!gc) return; for (node = rb_first(&priv->root); node ; node = next) { next = rb_next(node); rbe = rb_entry(node, struct nft_rbtree_elem, node); /* elements are reversed in the rbtree for historical reasons, * from highest to lowest value, that is why end element is * always visited before the start element. */ if (nft_rbtree_interval_end(rbe)) { rbe_end = rbe; continue; } if (!__nft_set_elem_expired(&rbe->ext, tstamp)) continue; gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); if (!gc) goto try_later; /* end element needs to be removed first, it has * no timeout extension. */ if (rbe_end) { nft_rbtree_gc_remove(net, set, priv, rbe_end); nft_trans_gc_elem_add(gc, rbe_end); rbe_end = NULL; } gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); if (!gc) goto try_later; nft_rbtree_gc_remove(net, set, priv, rbe); nft_trans_gc_elem_add(gc, rbe); } try_later: if (gc) { gc = nft_trans_gc_catchall_sync(gc); nft_trans_gc_queue_sync_done(gc); priv->last_gc = jiffies; } } static u64 nft_rbtree_privsize(const struct nlattr * const nla[], const struct nft_set_desc *desc) { return sizeof(struct nft_rbtree); } static int nft_rbtree_init(const struct nft_set *set, const struct nft_set_desc *desc, const struct nlattr * const nla[]) { struct nft_rbtree *priv = nft_set_priv(set); BUILD_BUG_ON(offsetof(struct nft_rbtree_elem, priv) != 0); rwlock_init(&priv->lock); seqcount_rwlock_init(&priv->count, &priv->lock); priv->root = RB_ROOT; return 0; } static void nft_rbtree_destroy(const struct nft_ctx *ctx, const struct nft_set *set) { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; struct rb_node *node; while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); nf_tables_set_elem_destroy(ctx, set, &rbe->priv); } } static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, struct nft_set_estimate *est) { if (desc->field_count > 1) return false; if (desc->size) est->size = sizeof(struct nft_rbtree) + desc->size * sizeof(struct nft_rbtree_elem); else est->size = ~0; est->lookup = NFT_SET_CLASS_O_LOG_N; est->space = NFT_SET_CLASS_O_N; return true; } static void nft_rbtree_commit(struct nft_set *set) { struct nft_rbtree *priv = nft_set_priv(set); if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set))) nft_rbtree_gc(set); } static void nft_rbtree_gc_init(const struct nft_set *set) { struct nft_rbtree *priv = nft_set_priv(set); priv->last_gc = jiffies; } /* rbtree stores ranges as singleton elements, each range is composed of two * elements ... */ static u32 nft_rbtree_ksize(u32 size) { return size * 2; } /* ... hide this detail to userspace. */ static u32 nft_rbtree_usize(u32 size) { if (!size) return 0; return size / 2; } static u32 nft_rbtree_adjust_maxsize(const struct nft_set *set) { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; struct rb_node *node; const void *key; node = rb_last(&priv->root); if (!node) return 0; rbe = rb_entry(node, struct nft_rbtree_elem, node); if (!nft_rbtree_interval_end(rbe)) return 0; key = nft_set_ext_key(&rbe->ext); if (memchr(key, 1, set->klen)) return 0; /* this is the all-zero no-match element. */ return 1; } const struct nft_set_type nft_set_rbtree_type = { .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, .ops = { .privsize = nft_rbtree_privsize, .elemsize = offsetof(struct nft_rbtree_elem, ext), .estimate = nft_rbtree_estimate, .init = nft_rbtree_init, .destroy = nft_rbtree_destroy, .insert = nft_rbtree_insert, .remove = nft_rbtree_remove, .deactivate = nft_rbtree_deactivate, .flush = nft_rbtree_flush, .activate = nft_rbtree_activate, .commit = nft_rbtree_commit, .gc_init = nft_rbtree_gc_init, .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, .get = nft_rbtree_get, .ksize = nft_rbtree_ksize, .usize = nft_rbtree_usize, .adjust_maxsize = nft_rbtree_adjust_maxsize, }, };
10 10 2 2 2 5 3 2 3 3 2 2 2 30 2 2 8 17 1 16 15 1 24 1 23 3 1 15 27 3 20 3 1 4 16 15 1 14 1 8 4 4 6 3 3 2 2 2 2 2 8 8 8 5 5 4 5 5 5 5 7 2 5 5 5 2 1 2 3 3 9 9 3 4 1 1 13 1 1 3 3 5 2 2 5 5 3 3 5 5 2 2 50 50 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2007-2014 Nicira, Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/init.h> #include <linux/module.h> #include <linux/if_arp.h> #include <linux/if_vlan.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/jhash.h> #include <linux/delay.h> #include <linux/time.h> #include <linux/etherdevice.h> #include <linux/kernel.h> #include <linux/kthread.h> #include <linux/mutex.h> #include <linux/percpu.h> #include <linux/rcupdate.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/ethtool.h> #include <linux/wait.h> #include <asm/div64.h> #include <linux/highmem.h> #include <linux/netfilter_bridge.h> #include <linux/netfilter_ipv4.h> #include <linux/inetdevice.h> #include <linux/list.h> #include <linux/openvswitch.h> #include <linux/rculist.h> #include <linux/dmi.h> #include <net/genetlink.h> #include <net/gso.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/pkt_cls.h> #include "datapath.h" #include "drop.h" #include "flow.h" #include "flow_table.h" #include "flow_netlink.h" #include "meter.h" #include "openvswitch_trace.h" #include "vport-internal_dev.h" #include "vport-netdev.h" unsigned int ovs_net_id __read_mostly; static struct genl_family dp_packet_genl_family; static struct genl_family dp_flow_genl_family; static struct genl_family dp_datapath_genl_family; static const struct nla_policy flow_policy[]; static const struct genl_multicast_group ovs_dp_flow_multicast_group = { .name = OVS_FLOW_MCGROUP, }; static const struct genl_multicast_group ovs_dp_datapath_multicast_group = { .name = OVS_DATAPATH_MCGROUP, }; static const struct genl_multicast_group ovs_dp_vport_multicast_group = { .name = OVS_VPORT_MCGROUP, }; /* Check if need to build a reply message. * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ static bool ovs_must_notify(struct genl_family *family, struct genl_info *info, unsigned int group) { return info->nlhdr->nlmsg_flags & NLM_F_ECHO || genl_has_listeners(family, genl_info_net(info), group); } static void ovs_notify(struct genl_family *family, struct sk_buff *skb, struct genl_info *info) { genl_notify(family, skb, info, 0, GFP_KERNEL); } /** * DOC: Locking: * * All writes e.g. Writes to device state (add/remove datapath, port, set * operations on vports, etc.), Writes to other state (flow table * modifications, set miscellaneous datapath parameters, etc.) are protected * by ovs_lock. * * Reads are protected by RCU. * * There are a few special cases (mostly stats) that have their own * synchronization but they nest under all of above and don't interact with * each other. * * The RTNL lock nests inside ovs_mutex. */ static DEFINE_MUTEX(ovs_mutex); void ovs_lock(void) { mutex_lock(&ovs_mutex); } void ovs_unlock(void) { mutex_unlock(&ovs_mutex); } #ifdef CONFIG_LOCKDEP int lockdep_ovsl_is_held(void) { if (debug_locks) return lockdep_is_held(&ovs_mutex); else return 1; } #endif static struct vport *new_vport(const struct vport_parms *); static int queue_gso_packets(struct datapath *dp, struct sk_buff *, const struct sw_flow_key *, const struct dp_upcall_info *, uint32_t cutlen); static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, const struct sw_flow_key *, const struct dp_upcall_info *, uint32_t cutlen); static void ovs_dp_masks_rebalance(struct work_struct *work); static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *); /* Must be called with rcu_read_lock or ovs_mutex. */ const char *ovs_dp_name(const struct datapath *dp) { struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL); return ovs_vport_name(vport); } static int get_dpifindex(const struct datapath *dp) { struct vport *local; int ifindex; rcu_read_lock(); local = ovs_vport_rcu(dp, OVSP_LOCAL); if (local) ifindex = local->dev->ifindex; else ifindex = 0; rcu_read_unlock(); return ifindex; } static void destroy_dp_rcu(struct rcu_head *rcu) { struct datapath *dp = container_of(rcu, struct datapath, rcu); ovs_flow_tbl_destroy(&dp->table); free_percpu(dp->stats_percpu); kfree(dp->ports); ovs_meters_exit(dp); kfree(rcu_dereference_raw(dp->upcall_portids)); kfree(dp); } static struct hlist_head *vport_hash_bucket(const struct datapath *dp, u16 port_no) { return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; } /* Called with ovs_mutex or RCU read lock. */ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) { struct vport *vport; struct hlist_head *head; head = vport_hash_bucket(dp, port_no); hlist_for_each_entry_rcu(vport, head, dp_hash_node, lockdep_ovsl_is_held()) { if (vport->port_no == port_no) return vport; } return NULL; } /* Called with ovs_mutex. */ static struct vport *new_vport(const struct vport_parms *parms) { struct vport *vport; vport = ovs_vport_add(parms); if (!IS_ERR(vport)) { struct datapath *dp = parms->dp; struct hlist_head *head = vport_hash_bucket(dp, vport->port_no); hlist_add_head_rcu(&vport->dp_hash_node, head); } return vport; } static void ovs_vport_update_upcall_stats(struct sk_buff *skb, const struct dp_upcall_info *upcall_info, bool upcall_result) { struct vport *p = OVS_CB(skb)->input_vport; struct vport_upcall_stats_percpu *stats; if (upcall_info->cmd != OVS_PACKET_CMD_MISS && upcall_info->cmd != OVS_PACKET_CMD_ACTION) return; stats = this_cpu_ptr(p->upcall_stats); u64_stats_update_begin(&stats->syncp); if (upcall_result) u64_stats_inc(&stats->n_success); else u64_stats_inc(&stats->n_fail); u64_stats_update_end(&stats->syncp); } void ovs_dp_detach_port(struct vport *p) { ASSERT_OVSL(); /* First drop references to device. */ hlist_del_rcu(&p->dp_hash_node); /* Then destroy it. */ ovs_vport_del(p); } /* Must be called with rcu_read_lock. */ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) { struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage); const struct vport *p = OVS_CB(skb)->input_vport; struct datapath *dp = p->dp; struct sw_flow *flow; struct sw_flow_actions *sf_acts; struct dp_stats_percpu *stats; bool ovs_pcpu_locked = false; u64 *stats_counter; u32 n_mask_hit; u32 n_cache_hit; int error; stats = this_cpu_ptr(dp->stats_percpu); /* Look up flow. */ flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb), &n_mask_hit, &n_cache_hit); if (unlikely(!flow)) { struct dp_upcall_info upcall; memset(&upcall, 0, sizeof(upcall)); upcall.cmd = OVS_PACKET_CMD_MISS; if (OVS_CB(skb)->upcall_pid) upcall.portid = OVS_CB(skb)->upcall_pid; else if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU) upcall.portid = ovs_dp_get_upcall_portid(dp, smp_processor_id()); else upcall.portid = ovs_vport_find_upcall_portid(p, skb); upcall.mru = OVS_CB(skb)->mru; error = ovs_dp_upcall(dp, skb, key, &upcall, 0); switch (error) { case 0: case -EAGAIN: case -ERESTARTSYS: case -EINTR: consume_skb(skb); break; default: kfree_skb(skb); break; } stats_counter = &stats->n_missed; goto out; } ovs_flow_stats_update(flow, key->tp.flags, skb); sf_acts = rcu_dereference(flow->sf_acts); /* This path can be invoked recursively: Use the current task to * identify recursive invocation - the lock must be acquired only once. * Even with disabled bottom halves this can be preempted on PREEMPT_RT. * Limit the locking to RT to avoid assigning `owner' if it can be * avoided. */ if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) { local_lock_nested_bh(&ovs_pcpu_storage->bh_lock); ovs_pcpu->owner = current; ovs_pcpu_locked = true; } error = ovs_execute_actions(dp, skb, sf_acts, key); if (unlikely(error)) net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n", ovs_dp_name(dp), error); if (ovs_pcpu_locked) { ovs_pcpu->owner = NULL; local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock); } stats_counter = &stats->n_hit; out: /* Update datapath statistics. */ u64_stats_update_begin(&stats->syncp); (*stats_counter)++; stats->n_mask_hit += n_mask_hit; stats->n_cache_hit += n_cache_hit; u64_stats_update_end(&stats->syncp); } int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct sw_flow_key *key, const struct dp_upcall_info *upcall_info, uint32_t cutlen) { struct dp_stats_percpu *stats; int err; if (trace_ovs_dp_upcall_enabled()) trace_ovs_dp_upcall(dp, skb, key, upcall_info); if (upcall_info->portid == 0) { err = -ENOTCONN; goto err; } if (!skb_is_gso(skb)) err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); else err = queue_gso_packets(dp, skb, key, upcall_info, cutlen); ovs_vport_update_upcall_stats(skb, upcall_info, !err); if (err) goto err; return 0; err: stats = this_cpu_ptr(dp->stats_percpu); u64_stats_update_begin(&stats->syncp); stats->n_lost++; u64_stats_update_end(&stats->syncp); return err; } static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, const struct sw_flow_key *key, const struct dp_upcall_info *upcall_info, uint32_t cutlen) { unsigned int gso_type = skb_shinfo(skb)->gso_type; struct sw_flow_key later_key; struct sk_buff *segs, *nskb; int err; BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET); segs = __skb_gso_segment(skb, NETIF_F_SG, false); if (IS_ERR(segs)) return PTR_ERR(segs); if (segs == NULL) return -EINVAL; if (gso_type & SKB_GSO_UDP) { /* The initial flow key extracted by ovs_flow_key_extract() * in this case is for a first fragment, so we need to * properly mark later fragments. */ later_key = *key; later_key.ip.frag = OVS_FRAG_TYPE_LATER; } /* Queue all of the segments. */ skb_list_walk_safe(segs, skb, nskb) { if (gso_type & SKB_GSO_UDP && skb != segs) key = &later_key; err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); if (err) break; } /* Free all of the segments. */ skb_list_walk_safe(segs, skb, nskb) { if (err) kfree_skb(skb); else consume_skb(skb); } return err; } static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, unsigned int hdrlen, int actions_attrlen) { size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */ + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */ + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */ /* OVS_PACKET_ATTR_USERDATA */ if (upcall_info->userdata) size += NLA_ALIGN(upcall_info->userdata->nla_len); /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */ if (upcall_info->egress_tun_info) size += nla_total_size(ovs_tun_key_attr_size()); /* OVS_PACKET_ATTR_ACTIONS */ if (upcall_info->actions_len) size += nla_total_size(actions_attrlen); /* OVS_PACKET_ATTR_MRU */ if (upcall_info->mru) size += nla_total_size(sizeof(upcall_info->mru)); return size; } static void pad_packet(struct datapath *dp, struct sk_buff *skb) { if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { size_t plen = NLA_ALIGN(skb->len) - skb->len; if (plen > 0) skb_put_zero(skb, plen); } } static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, const struct sw_flow_key *key, const struct dp_upcall_info *upcall_info, uint32_t cutlen) { struct ovs_header *upcall; struct sk_buff *nskb = NULL; struct sk_buff *user_skb = NULL; /* to be queued to userspace */ struct nlattr *nla; size_t len; unsigned int hlen; int err, dp_ifindex; u64 hash; dp_ifindex = get_dpifindex(dp); if (!dp_ifindex) return -ENODEV; if (skb_vlan_tag_present(skb)) { nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) return -ENOMEM; nskb = __vlan_hwaccel_push_inside(nskb); if (!nskb) return -ENOMEM; skb = nskb; } if (nla_attr_size(skb->len) > USHRT_MAX) { err = -EFBIG; goto out; } /* Complete checksum if needed */ if (skb->ip_summed == CHECKSUM_PARTIAL && (err = skb_csum_hwoffload_help(skb, 0))) goto out; /* Older versions of OVS user space enforce alignment of the last * Netlink attribute to NLA_ALIGNTO which would require extensive * padding logic. Only perform zerocopy if padding is not required. */ if (dp->user_features & OVS_DP_F_UNALIGNED) hlen = skb_zerocopy_headlen(skb); else hlen = skb->len; len = upcall_msg_size(upcall_info, hlen - cutlen, OVS_CB(skb)->acts_origlen); user_skb = genlmsg_new(len, GFP_ATOMIC); if (!user_skb) { err = -ENOMEM; goto out; } upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd); if (!upcall) { err = -EINVAL; goto out; } upcall->dp_ifindex = dp_ifindex; err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); if (err) goto out; if (upcall_info->userdata) __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, nla_len(upcall_info->userdata), nla_data(upcall_info->userdata)); if (upcall_info->egress_tun_info) { nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); if (!nla) { err = -EMSGSIZE; goto out; } err = ovs_nla_put_tunnel_info(user_skb, upcall_info->egress_tun_info); if (err) goto out; nla_nest_end(user_skb, nla); } if (upcall_info->actions_len) { nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS); if (!nla) { err = -EMSGSIZE; goto out; } err = ovs_nla_put_actions(upcall_info->actions, upcall_info->actions_len, user_skb); if (!err) nla_nest_end(user_skb, nla); else nla_nest_cancel(user_skb, nla); } /* Add OVS_PACKET_ATTR_MRU */ if (upcall_info->mru && nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) { err = -ENOBUFS; goto out; } /* Add OVS_PACKET_ATTR_LEN when packet is truncated */ if (cutlen > 0 && nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) { err = -ENOBUFS; goto out; } /* Add OVS_PACKET_ATTR_HASH */ hash = skb_get_hash_raw(skb); if (skb->sw_hash) hash |= OVS_PACKET_HASH_SW_BIT; if (skb->l4_hash) hash |= OVS_PACKET_HASH_L4_BIT; if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) { err = -ENOBUFS; goto out; } /* Only reserve room for attribute header, packet data is added * in skb_zerocopy() */ if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { err = -ENOBUFS; goto out; } nla->nla_len = nla_attr_size(skb->len - cutlen); err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen); if (err) goto out; /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */ pad_packet(dp, user_skb); ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); user_skb = NULL; out: if (err) skb_tx_error(skb); consume_skb(user_skb); consume_skb(nskb); return err; } static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) { struct ovs_header *ovs_header = genl_info_userhdr(info); struct net *net = sock_net(skb->sk); struct nlattr **a = info->attrs; struct sw_flow_actions *acts; struct sk_buff *packet; struct sw_flow *flow; struct sw_flow_actions *sf_acts; struct datapath *dp; struct vport *input_vport; u16 mru = 0; u64 hash; int len; int err; bool log = !a[OVS_PACKET_ATTR_PROBE]; err = -EINVAL; if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || !a[OVS_PACKET_ATTR_ACTIONS]) goto err; len = nla_len(a[OVS_PACKET_ATTR_PACKET]); packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL); err = -ENOMEM; if (!packet) goto err; skb_reserve(packet, NET_IP_ALIGN); nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len); /* Set packet's mru */ if (a[OVS_PACKET_ATTR_MRU]) { mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]); packet->ignore_df = 1; } OVS_CB(packet)->mru = mru; if (a[OVS_PACKET_ATTR_HASH]) { hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]); __skb_set_hash(packet, hash & 0xFFFFFFFFULL, !!(hash & OVS_PACKET_HASH_SW_BIT), !!(hash & OVS_PACKET_HASH_L4_BIT)); } OVS_CB(packet)->upcall_pid = nla_get_u32_default(a[OVS_PACKET_ATTR_UPCALL_PID], 0); /* Build an sw_flow for sending this packet. */ flow = ovs_flow_alloc(); err = PTR_ERR(flow); if (IS_ERR(flow)) goto err_kfree_skb; err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY], packet, &flow->key, log); if (err) goto err_flow_free; err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS], &flow->key, &acts, log); if (err) goto err_flow_free; rcu_assign_pointer(flow->sf_acts, acts); packet->priority = flow->key.phy.priority; packet->mark = flow->key.phy.skb_mark; rcu_read_lock(); dp = get_dp_rcu(net, ovs_header->dp_ifindex); err = -ENODEV; if (!dp) goto err_unlock; input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port); if (!input_vport) input_vport = ovs_vport_rcu(dp, OVSP_LOCAL); if (!input_vport) goto err_unlock; packet->dev = input_vport->dev; OVS_CB(packet)->input_vport = input_vport; sf_acts = rcu_dereference(flow->sf_acts); local_bh_disable(); local_lock_nested_bh(&ovs_pcpu_storage->bh_lock); if (IS_ENABLED(CONFIG_PREEMPT_RT)) this_cpu_write(ovs_pcpu_storage->owner, current); err = ovs_execute_actions(dp, packet, sf_acts, &flow->key); if (IS_ENABLED(CONFIG_PREEMPT_RT)) this_cpu_write(ovs_pcpu_storage->owner, NULL); local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock); local_bh_enable(); rcu_read_unlock(); ovs_flow_free(flow, false); return err; err_unlock: rcu_read_unlock(); err_flow_free: ovs_flow_free(flow, false); err_kfree_skb: kfree_skb(packet); err: return err; } static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN }, [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG }, [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 }, [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 }, [OVS_PACKET_ATTR_UPCALL_PID] = { .type = NLA_U32 }, }; static const struct genl_small_ops dp_packet_genl_ops[] = { { .cmd = OVS_PACKET_CMD_EXECUTE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_packet_cmd_execute } }; static struct genl_family dp_packet_genl_family __ro_after_init = { .hdrsize = sizeof(struct ovs_header), .name = OVS_PACKET_FAMILY, .version = OVS_PACKET_VERSION, .maxattr = OVS_PACKET_ATTR_MAX, .policy = packet_policy, .netnsok = true, .parallel_ops = true, .small_ops = dp_packet_genl_ops, .n_small_ops = ARRAY_SIZE(dp_packet_genl_ops), .resv_start_op = OVS_PACKET_CMD_EXECUTE + 1, .module = THIS_MODULE, }; static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, struct ovs_dp_megaflow_stats *mega_stats) { int i; memset(mega_stats, 0, sizeof(*mega_stats)); stats->n_flows = ovs_flow_tbl_count(&dp->table); mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table); stats->n_hit = stats->n_missed = stats->n_lost = 0; for_each_possible_cpu(i) { const struct dp_stats_percpu *percpu_stats; struct dp_stats_percpu local_stats; unsigned int start; percpu_stats = per_cpu_ptr(dp->stats_percpu, i); do { start = u64_stats_fetch_begin(&percpu_stats->syncp); local_stats = *percpu_stats; } while (u64_stats_fetch_retry(&percpu_stats->syncp, start)); stats->n_hit += local_stats.n_hit; stats->n_missed += local_stats.n_missed; stats->n_lost += local_stats.n_lost; mega_stats->n_mask_hit += local_stats.n_mask_hit; mega_stats->n_cache_hit += local_stats.n_cache_hit; } } static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags) { return ovs_identifier_is_ufid(sfid) && !(ufid_flags & OVS_UFID_F_OMIT_KEY); } static bool should_fill_mask(uint32_t ufid_flags) { return !(ufid_flags & OVS_UFID_F_OMIT_MASK); } static bool should_fill_actions(uint32_t ufid_flags) { return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS); } static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts, const struct sw_flow_id *sfid, uint32_t ufid_flags) { size_t len = NLMSG_ALIGN(sizeof(struct ovs_header)); /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback * see ovs_nla_put_identifier() */ if (sfid && ovs_identifier_is_ufid(sfid)) len += nla_total_size(sfid->ufid_len); else len += nla_total_size(ovs_key_attr_size()); /* OVS_FLOW_ATTR_KEY */ if (!sfid || should_fill_key(sfid, ufid_flags)) len += nla_total_size(ovs_key_attr_size()); /* OVS_FLOW_ATTR_MASK */ if (should_fill_mask(ufid_flags)) len += nla_total_size(ovs_key_attr_size()); /* OVS_FLOW_ATTR_ACTIONS */ if (should_fill_actions(ufid_flags)) len += nla_total_size(acts->orig_len); return len + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */ } /* Called with ovs_mutex or RCU read lock. */ static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow, struct sk_buff *skb) { struct ovs_flow_stats stats; __be16 tcp_flags; unsigned long used; ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); if (used && nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used), OVS_FLOW_ATTR_PAD)) return -EMSGSIZE; if (stats.n_packets && nla_put_64bit(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats, OVS_FLOW_ATTR_PAD)) return -EMSGSIZE; if ((u8)ntohs(tcp_flags) && nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags))) return -EMSGSIZE; return 0; } /* Called with ovs_mutex or RCU read lock. */ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow, struct sk_buff *skb, int skb_orig_len) { struct nlattr *start; int err; /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if * this is the first flow to be dumped into 'skb'. This is unusual for * Netlink but individual action lists can be longer than * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this. * The userspace caller can always fetch the actions separately if it * really wants them. (Most userspace callers in fact don't care.) * * This can only fail for dump operations because the skb is always * properly sized for single flows. */ start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS); if (start) { const struct sw_flow_actions *sf_acts; sf_acts = rcu_dereference_ovsl(flow->sf_acts); err = ovs_nla_put_actions(sf_acts->actions, sf_acts->actions_len, skb); if (!err) nla_nest_end(skb, start); else { if (skb_orig_len) return err; nla_nest_cancel(skb, start); } } else if (skb_orig_len) { return -EMSGSIZE; } return 0; } /* Called with ovs_mutex or RCU read lock. */ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd, u32 ufid_flags) { const int skb_orig_len = skb->len; struct ovs_header *ovs_header; int err; ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); if (!ovs_header) return -EMSGSIZE; ovs_header->dp_ifindex = dp_ifindex; err = ovs_nla_put_identifier(flow, skb); if (err) goto error; if (should_fill_key(&flow->id, ufid_flags)) { err = ovs_nla_put_masked_key(flow, skb); if (err) goto error; } if (should_fill_mask(ufid_flags)) { err = ovs_nla_put_mask(flow, skb); if (err) goto error; } err = ovs_flow_cmd_fill_stats(flow, skb); if (err) goto error; if (should_fill_actions(ufid_flags)) { err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len); if (err) goto error; } genlmsg_end(skb, ovs_header); return 0; error: genlmsg_cancel(skb, ovs_header); return err; } /* May not be called with RCU read lock. */ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts, const struct sw_flow_id *sfid, struct genl_info *info, bool always, uint32_t ufid_flags) { struct sk_buff *skb; size_t len; if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0)) return NULL; len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags); skb = genlmsg_new(len, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); return skb; } /* Called with ovs_mutex. */ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, int dp_ifindex, struct genl_info *info, u8 cmd, bool always, u32 ufid_flags) { struct sk_buff *skb; int retval; skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), &flow->id, info, always, ufid_flags); if (IS_ERR_OR_NULL(skb)) return skb; retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, info->snd_portid, info->snd_seq, 0, cmd, ufid_flags); if (WARN_ON_ONCE(retval < 0)) { kfree_skb(skb); skb = ERR_PTR(retval); } return skb; } static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct net *net = sock_net(skb->sk); struct nlattr **a = info->attrs; struct ovs_header *ovs_header = genl_info_userhdr(info); struct sw_flow *flow = NULL, *new_flow; struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; struct sw_flow_key *key; struct sw_flow_actions *acts; struct sw_flow_match match; u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); int error; bool log = !a[OVS_FLOW_ATTR_PROBE]; /* Must have key and actions. */ error = -EINVAL; if (!a[OVS_FLOW_ATTR_KEY]) { OVS_NLERR(log, "Flow key attr not present in new flow."); goto error; } if (!a[OVS_FLOW_ATTR_ACTIONS]) { OVS_NLERR(log, "Flow actions attr not present in new flow."); goto error; } /* Most of the time we need to allocate a new flow, do it before * locking. */ new_flow = ovs_flow_alloc(); if (IS_ERR(new_flow)) { error = PTR_ERR(new_flow); goto error; } /* Extract key. */ key = kzalloc(sizeof(*key), GFP_KERNEL); if (!key) { error = -ENOMEM; goto err_kfree_flow; } ovs_match_init(&match, key, false, &mask); error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) goto err_kfree_key; ovs_flow_mask_key(&new_flow->key, key, true, &mask); /* Extract flow identifier. */ error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], key, log); if (error) goto err_kfree_key; /* Validate actions. */ error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, &acts, log); if (error) { OVS_NLERR(log, "Flow actions may not be safe on all matching packets."); goto err_kfree_key; } reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false, ufid_flags); if (IS_ERR(reply)) { error = PTR_ERR(reply); goto err_kfree_acts; } ovs_lock(); dp = get_dp(net, ovs_header->dp_ifindex); if (unlikely(!dp)) { error = -ENODEV; goto err_unlock_ovs; } /* Check if this is a duplicate flow */ if (ovs_identifier_is_ufid(&new_flow->id)) flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id); if (!flow) flow = ovs_flow_tbl_lookup(&dp->table, key); if (likely(!flow)) { rcu_assign_pointer(new_flow->sf_acts, acts); /* Put flow in bucket. */ error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask); if (unlikely(error)) { acts = NULL; goto err_unlock_ovs; } if (unlikely(reply)) { error = ovs_flow_cmd_fill_info(new_flow, ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, OVS_FLOW_CMD_NEW, ufid_flags); BUG_ON(error < 0); } ovs_unlock(); } else { struct sw_flow_actions *old_acts; /* Bail out if we're not allowed to modify an existing flow. * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL * because Generic Netlink treats the latter as a dump * request. We also accept NLM_F_EXCL in case that bug ever * gets fixed. */ if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))) { error = -EEXIST; goto err_unlock_ovs; } /* The flow identifier has to be the same for flow updates. * Look for any overlapping flow. */ if (unlikely(!ovs_flow_cmp(flow, &match))) { if (ovs_identifier_is_key(&flow->id)) flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); else /* UFID matches but key is different */ flow = NULL; if (!flow) { error = -ENOENT; goto err_unlock_ovs; } } /* Update actions. */ old_acts = ovsl_dereference(flow->sf_acts); rcu_assign_pointer(flow->sf_acts, acts); if (unlikely(reply)) { error = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, OVS_FLOW_CMD_NEW, ufid_flags); BUG_ON(error < 0); } ovs_unlock(); ovs_nla_free_flow_actions_rcu(old_acts); ovs_flow_free(new_flow, false); } if (reply) ovs_notify(&dp_flow_genl_family, reply, info); kfree(key); return 0; err_unlock_ovs: ovs_unlock(); kfree_skb(reply); err_kfree_acts: ovs_nla_free_flow_actions(acts); err_kfree_key: kfree(key); err_kfree_flow: ovs_flow_free(new_flow, false); error: return error; } /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */ static noinline_for_stack struct sw_flow_actions *get_flow_actions(struct net *net, const struct nlattr *a, const struct sw_flow_key *key, const struct sw_flow_mask *mask, bool log) { struct sw_flow_actions *acts; struct sw_flow_key masked_key; int error; ovs_flow_mask_key(&masked_key, key, true, mask); error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log); if (error) { OVS_NLERR(log, "Actions may not be safe on all matching packets"); return ERR_PTR(error); } return acts; } /* Factor out match-init and action-copy to avoid * "Wframe-larger-than=1024" warning. Because mask is only * used to get actions, we new a function to save some * stack space. * * If there are not key and action attrs, we return 0 * directly. In the case, the caller will also not use the * match as before. If there is action attr, we try to get * actions and save them to *acts. Before returning from * the function, we reset the match->mask pointer. Because * we should not to return match object with dangling reference * to mask. * */ static noinline_for_stack int ovs_nla_init_match_and_action(struct net *net, struct sw_flow_match *match, struct sw_flow_key *key, struct nlattr **a, struct sw_flow_actions **acts, bool log) { struct sw_flow_mask mask; int error = 0; if (a[OVS_FLOW_ATTR_KEY]) { ovs_match_init(match, key, true, &mask); error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) goto error; } if (a[OVS_FLOW_ATTR_ACTIONS]) { if (!a[OVS_FLOW_ATTR_KEY]) { OVS_NLERR(log, "Flow key attribute not present in set flow."); error = -EINVAL; goto error; } *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key, &mask, log); if (IS_ERR(*acts)) { error = PTR_ERR(*acts); goto error; } } /* On success, error is 0. */ error: match->mask = NULL; return error; } static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) { struct net *net = sock_net(skb->sk); struct nlattr **a = info->attrs; struct ovs_header *ovs_header = genl_info_userhdr(info); struct sw_flow_key key; struct sw_flow *flow; struct sk_buff *reply = NULL; struct datapath *dp; struct sw_flow_actions *old_acts = NULL, *acts = NULL; struct sw_flow_match match; struct sw_flow_id sfid; u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); int error = 0; bool log = !a[OVS_FLOW_ATTR_PROBE]; bool ufid_present; ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) { OVS_NLERR(log, "Flow set message rejected, Key attribute missing."); return -EINVAL; } error = ovs_nla_init_match_and_action(net, &match, &key, a, &acts, log); if (error) goto error; if (acts) { /* Can allocate before locking if have acts. */ reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false, ufid_flags); if (IS_ERR(reply)) { error = PTR_ERR(reply); goto err_kfree_acts; } } ovs_lock(); dp = get_dp(net, ovs_header->dp_ifindex); if (unlikely(!dp)) { error = -ENODEV; goto err_unlock_ovs; } /* Check that the flow exists. */ if (ufid_present) flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid); else flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (unlikely(!flow)) { error = -ENOENT; goto err_unlock_ovs; } /* Update actions, if present. */ if (likely(acts)) { old_acts = ovsl_dereference(flow->sf_acts); rcu_assign_pointer(flow->sf_acts, acts); if (unlikely(reply)) { error = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, OVS_FLOW_CMD_SET, ufid_flags); BUG_ON(error < 0); } } else { /* Could not alloc without acts before locking. */ reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, OVS_FLOW_CMD_SET, false, ufid_flags); if (IS_ERR(reply)) { error = PTR_ERR(reply); goto err_unlock_ovs; } } /* Clear stats. */ if (a[OVS_FLOW_ATTR_CLEAR]) ovs_flow_stats_clear(flow); ovs_unlock(); if (reply) ovs_notify(&dp_flow_genl_family, reply, info); if (old_acts) ovs_nla_free_flow_actions_rcu(old_acts); return 0; err_unlock_ovs: ovs_unlock(); kfree_skb(reply); err_kfree_acts: ovs_nla_free_flow_actions(acts); error: return error; } static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = genl_info_userhdr(info); struct net *net = sock_net(skb->sk); struct sw_flow_key key; struct sk_buff *reply; struct sw_flow *flow; struct datapath *dp; struct sw_flow_match match; struct sw_flow_id ufid; u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); int err = 0; bool log = !a[OVS_FLOW_ATTR_PROBE]; bool ufid_present; ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); if (a[OVS_FLOW_ATTR_KEY]) { ovs_match_init(&match, &key, true, NULL); err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL, log); } else if (!ufid_present) { OVS_NLERR(log, "Flow get message rejected, Key attribute missing."); err = -EINVAL; } if (err) return err; ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) { err = -ENODEV; goto unlock; } if (ufid_present) flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid); else flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (!flow) { err = -ENOENT; goto unlock; } reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, OVS_FLOW_CMD_GET, true, ufid_flags); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; } ovs_unlock(); return genlmsg_reply(reply, info); unlock: ovs_unlock(); return err; } static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = genl_info_userhdr(info); struct net *net = sock_net(skb->sk); struct sw_flow_key key; struct sk_buff *reply; struct sw_flow *flow = NULL; struct datapath *dp; struct sw_flow_match match; struct sw_flow_id ufid; u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); int err; bool log = !a[OVS_FLOW_ATTR_PROBE]; bool ufid_present; ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); if (a[OVS_FLOW_ATTR_KEY]) { ovs_match_init(&match, &key, true, NULL); err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL, log); if (unlikely(err)) return err; } ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); if (unlikely(!dp)) { err = -ENODEV; goto unlock; } if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) { err = ovs_flow_tbl_flush(&dp->table); goto unlock; } if (ufid_present) flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid); else flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (unlikely(!flow)) { err = -ENOENT; goto unlock; } ovs_flow_tbl_remove(&dp->table, flow); ovs_unlock(); reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, &flow->id, info, false, ufid_flags); if (likely(reply)) { if (!IS_ERR(reply)) { rcu_read_lock(); /*To keep RCU checker happy. */ err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, OVS_FLOW_CMD_DEL, ufid_flags); rcu_read_unlock(); if (WARN_ON_ONCE(err < 0)) { kfree_skb(reply); goto out_free; } ovs_notify(&dp_flow_genl_family, reply, info); } else { netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply)); } } out_free: ovs_flow_free(flow, true); return 0; unlock: ovs_unlock(); return err; } static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct nlattr *a[__OVS_FLOW_ATTR_MAX]; struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); struct table_instance *ti; struct datapath *dp; u32 ufid_flags; int err; err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a, OVS_FLOW_ATTR_MAX, flow_policy, NULL); if (err) return err; ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); rcu_read_lock(); dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) { rcu_read_unlock(); return -ENODEV; } ti = rcu_dereference(dp->table.ti); for (;;) { struct sw_flow *flow; u32 bucket, obj; bucket = cb->args[0]; obj = cb->args[1]; flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj); if (!flow) break; if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_FLOW_CMD_GET, ufid_flags) < 0) break; cb->args[0] = bucket; cb->args[1] = obj; } rcu_read_unlock(); return skb->len; } static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED }, [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG }, [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 }, [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 }, }; static const struct genl_small_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_NEW, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_flow_cmd_new }, { .cmd = OVS_FLOW_CMD_DEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_flow_cmd_del }, { .cmd = OVS_FLOW_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ .doit = ovs_flow_cmd_get, .dumpit = ovs_flow_cmd_dump }, { .cmd = OVS_FLOW_CMD_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_flow_cmd_set, }, }; static struct genl_family dp_flow_genl_family __ro_after_init = { .hdrsize = sizeof(struct ovs_header), .name = OVS_FLOW_FAMILY, .version = OVS_FLOW_VERSION, .maxattr = OVS_FLOW_ATTR_MAX, .policy = flow_policy, .netnsok = true, .parallel_ops = true, .small_ops = dp_flow_genl_ops, .n_small_ops = ARRAY_SIZE(dp_flow_genl_ops), .resv_start_op = OVS_FLOW_CMD_SET + 1, .mcgrps = &ovs_dp_flow_multicast_group, .n_mcgrps = 1, .module = THIS_MODULE, }; static size_t ovs_dp_cmd_msg_size(void) { size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header)); msgsize += nla_total_size(IFNAMSIZ); msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats)); msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats)); msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */ msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */ msgsize += nla_total_size(sizeof(u32) * nr_cpu_ids); /* OVS_DP_ATTR_PER_CPU_PIDS */ return msgsize; } /* Called with ovs_mutex. */ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { struct ovs_header *ovs_header; struct ovs_dp_stats dp_stats; struct ovs_dp_megaflow_stats dp_megaflow_stats; struct dp_nlsk_pids *pids = ovsl_dereference(dp->upcall_portids); int err, pids_len; ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, flags, cmd); if (!ovs_header) goto error; ovs_header->dp_ifindex = get_dpifindex(dp); err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp)); if (err) goto nla_put_failure; get_dp_stats(dp, &dp_stats, &dp_megaflow_stats); if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats, OVS_DP_ATTR_PAD)) goto nla_put_failure; if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS, sizeof(struct ovs_dp_megaflow_stats), &dp_megaflow_stats, OVS_DP_ATTR_PAD)) goto nla_put_failure; if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) goto nla_put_failure; if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE, ovs_flow_tbl_masks_cache_size(&dp->table))) goto nla_put_failure; if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && pids) { pids_len = min(pids->n_pids, nr_cpu_ids) * sizeof(u32); if (nla_put(skb, OVS_DP_ATTR_PER_CPU_PIDS, pids_len, &pids->pids)) goto nla_put_failure; } genlmsg_end(skb, ovs_header); return 0; nla_put_failure: genlmsg_cancel(skb, ovs_header); error: return -EMSGSIZE; } static struct sk_buff *ovs_dp_cmd_alloc_info(void) { return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL); } /* Called with rcu_read_lock or ovs_mutex. */ static struct datapath *lookup_datapath(struct net *net, const struct ovs_header *ovs_header, struct nlattr *a[OVS_DP_ATTR_MAX + 1]) { struct datapath *dp; if (!a[OVS_DP_ATTR_NAME]) dp = get_dp(net, ovs_header->dp_ifindex); else { struct vport *vport; vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME])); dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; } return dp ? dp : ERR_PTR(-ENODEV); } static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info) { struct datapath *dp; dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info), info->attrs); if (IS_ERR(dp)) return; pr_warn("%s: Dropping previously announced user features\n", ovs_dp_name(dp)); dp->user_features = 0; } static int ovs_dp_set_upcall_portids(struct datapath *dp, const struct nlattr *ids) { struct dp_nlsk_pids *old, *dp_nlsk_pids; if (!nla_len(ids) || nla_len(ids) % sizeof(u32)) return -EINVAL; old = ovsl_dereference(dp->upcall_portids); dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids), GFP_KERNEL); if (!dp_nlsk_pids) return -ENOMEM; dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32); nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids)); rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids); kfree_rcu(old, rcu); return 0; } u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id) { struct dp_nlsk_pids *dp_nlsk_pids; dp_nlsk_pids = rcu_dereference(dp->upcall_portids); if (dp_nlsk_pids) { if (cpu_id < dp_nlsk_pids->n_pids) { return dp_nlsk_pids->pids[cpu_id]; } else if (dp_nlsk_pids->n_pids > 0 && cpu_id >= dp_nlsk_pids->n_pids) { /* If the number of netlink PIDs is mismatched with * the number of CPUs as seen by the kernel, log this * and send the upcall to an arbitrary socket (0) in * order to not drop packets */ pr_info_ratelimited("cpu_id mismatch with handler threads"); return dp_nlsk_pids->pids[cpu_id % dp_nlsk_pids->n_pids]; } else { return 0; } } else { return 0; } } static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) { u32 user_features = 0, old_features = dp->user_features; int err; if (a[OVS_DP_ATTR_USER_FEATURES]) { user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); if (user_features & ~(OVS_DP_F_VPORT_PIDS | OVS_DP_F_UNALIGNED | OVS_DP_F_TC_RECIRC_SHARING | OVS_DP_F_DISPATCH_UPCALL_PER_CPU)) return -EOPNOTSUPP; #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) if (user_features & OVS_DP_F_TC_RECIRC_SHARING) return -EOPNOTSUPP; #endif } if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) { int err; u32 cache_size; cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]); err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size); if (err) return err; } dp->user_features = user_features; if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && a[OVS_DP_ATTR_PER_CPU_PIDS]) { /* Upcall Netlink Port IDs have been updated */ err = ovs_dp_set_upcall_portids(dp, a[OVS_DP_ATTR_PER_CPU_PIDS]); if (err) return err; } if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) && !(old_features & OVS_DP_F_TC_RECIRC_SHARING)) tc_skb_ext_tc_enable(); else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) && (old_features & OVS_DP_F_TC_RECIRC_SHARING)) tc_skb_ext_tc_disable(); return 0; } static int ovs_dp_stats_init(struct datapath *dp) { dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); if (!dp->stats_percpu) return -ENOMEM; return 0; } static int ovs_dp_vport_init(struct datapath *dp) { int i; dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS, sizeof(struct hlist_head), GFP_KERNEL); if (!dp->ports) return -ENOMEM; for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) INIT_HLIST_HEAD(&dp->ports[i]); return 0; } static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct vport_parms parms; struct sk_buff *reply; struct datapath *dp; struct vport *vport; struct ovs_net *ovs_net; int err; err = -EINVAL; if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) goto err; reply = ovs_dp_cmd_alloc_info(); if (!reply) return -ENOMEM; err = -ENOMEM; dp = kzalloc(sizeof(*dp), GFP_KERNEL); if (dp == NULL) goto err_destroy_reply; ovs_dp_set_net(dp, sock_net(skb->sk)); /* Allocate table. */ err = ovs_flow_tbl_init(&dp->table); if (err) goto err_destroy_dp; err = ovs_dp_stats_init(dp); if (err) goto err_destroy_table; err = ovs_dp_vport_init(dp); if (err) goto err_destroy_stats; err = ovs_meters_init(dp); if (err) goto err_destroy_ports; /* Set up our datapath device. */ parms.name = nla_data(a[OVS_DP_ATTR_NAME]); parms.type = OVS_VPORT_TYPE_INTERNAL; parms.options = NULL; parms.dp = dp; parms.port_no = OVSP_LOCAL; parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; parms.desired_ifindex = nla_get_s32_default(a[OVS_DP_ATTR_IFINDEX], 0); /* So far only local changes have been made, now need the lock. */ ovs_lock(); err = ovs_dp_change(dp, a); if (err) goto err_unlock_and_destroy_meters; vport = new_vport(&parms); if (IS_ERR(vport)) { err = PTR_ERR(vport); if (err == -EBUSY) err = -EEXIST; if (err == -EEXIST) { /* An outdated user space instance that does not understand * the concept of user_features has attempted to create a new * datapath and is likely to reuse it. Drop all user features. */ if (info->genlhdr->version < OVS_DP_VER_FEATURES) ovs_dp_reset_user_features(skb, info); } goto err_destroy_portids; } err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, info->snd_seq, 0, OVS_DP_CMD_NEW); BUG_ON(err < 0); ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); list_add_tail_rcu(&dp->list_node, &ovs_net->dps); ovs_unlock(); ovs_notify(&dp_datapath_genl_family, reply, info); return 0; err_destroy_portids: kfree(rcu_dereference_raw(dp->upcall_portids)); err_unlock_and_destroy_meters: ovs_unlock(); ovs_meters_exit(dp); err_destroy_ports: kfree(dp->ports); err_destroy_stats: free_percpu(dp->stats_percpu); err_destroy_table: ovs_flow_tbl_destroy(&dp->table); err_destroy_dp: kfree(dp); err_destroy_reply: kfree_skb(reply); err: return err; } /* Called with ovs_mutex. */ static void __dp_destroy(struct datapath *dp) { struct flow_table *table = &dp->table; int i; if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) tc_skb_ext_tc_disable(); for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; struct hlist_node *n; hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) if (vport->port_no != OVSP_LOCAL) ovs_dp_detach_port(vport); } list_del_rcu(&dp->list_node); /* OVSP_LOCAL is datapath internal port. We need to make sure that * all ports in datapath are destroyed first before freeing datapath. */ ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); /* Flush sw_flow in the tables. RCU cb only releases resource * such as dp, ports and tables. That may avoid some issues * such as RCU usage warning. */ table_instance_flow_flush(table, ovsl_dereference(table->ti), ovsl_dereference(table->ufid_ti)); /* RCU destroy the ports, meters and flow tables. */ call_rcu(&dp->rcu, destroy_dp_rcu); } static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *reply; struct datapath *dp; int err; reply = ovs_dp_cmd_alloc_info(); if (!reply) return -ENOMEM; ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info), info->attrs); err = PTR_ERR(dp); if (IS_ERR(dp)) goto err_unlock_free; err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, info->snd_seq, 0, OVS_DP_CMD_DEL); BUG_ON(err < 0); __dp_destroy(dp); ovs_unlock(); ovs_notify(&dp_datapath_genl_family, reply, info); return 0; err_unlock_free: ovs_unlock(); kfree_skb(reply); return err; } static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *reply; struct datapath *dp; int err; reply = ovs_dp_cmd_alloc_info(); if (!reply) return -ENOMEM; ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info), info->attrs); err = PTR_ERR(dp); if (IS_ERR(dp)) goto err_unlock_free; err = ovs_dp_change(dp, info->attrs); if (err) goto err_unlock_free; err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, info->snd_seq, 0, OVS_DP_CMD_SET); BUG_ON(err < 0); ovs_unlock(); ovs_notify(&dp_datapath_genl_family, reply, info); return 0; err_unlock_free: ovs_unlock(); kfree_skb(reply); return err; } static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *reply; struct datapath *dp; int err; reply = ovs_dp_cmd_alloc_info(); if (!reply) return -ENOMEM; ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info), info->attrs); if (IS_ERR(dp)) { err = PTR_ERR(dp); goto err_unlock_free; } err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, info->snd_seq, 0, OVS_DP_CMD_GET); BUG_ON(err < 0); ovs_unlock(); return genlmsg_reply(reply, info); err_unlock_free: ovs_unlock(); kfree_skb(reply); return err; } static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id); struct datapath *dp; int skip = cb->args[0]; int i = 0; ovs_lock(); list_for_each_entry(dp, &ovs_net->dps, list_node) { if (i >= skip && ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_DP_CMD_GET) < 0) break; i++; } ovs_unlock(); cb->args[0] = i; return skb->len; } static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, [OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0, PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)), [OVS_DP_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0), }; static const struct genl_small_ops dp_datapath_genl_ops[] = { { .cmd = OVS_DP_CMD_NEW, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_dp_cmd_new }, { .cmd = OVS_DP_CMD_DEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_dp_cmd_del }, { .cmd = OVS_DP_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ .doit = ovs_dp_cmd_get, .dumpit = ovs_dp_cmd_dump }, { .cmd = OVS_DP_CMD_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_dp_cmd_set, }, }; static struct genl_family dp_datapath_genl_family __ro_after_init = { .hdrsize = sizeof(struct ovs_header), .name = OVS_DATAPATH_FAMILY, .version = OVS_DATAPATH_VERSION, .maxattr = OVS_DP_ATTR_MAX, .policy = datapath_policy, .netnsok = true, .parallel_ops = true, .small_ops = dp_datapath_genl_ops, .n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops), .resv_start_op = OVS_DP_CMD_SET + 1, .mcgrps = &ovs_dp_datapath_multicast_group, .n_mcgrps = 1, .module = THIS_MODULE, }; /* Called with ovs_mutex or RCU read lock. */ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, struct net *net, u32 portid, u32 seq, u32 flags, u8 cmd, gfp_t gfp) { struct ovs_header *ovs_header; struct ovs_vport_stats vport_stats; struct net *net_vport; int err; ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, flags, cmd); if (!ovs_header) return -EMSGSIZE; ovs_header->dp_ifindex = get_dpifindex(vport->dp); if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || nla_put_string(skb, OVS_VPORT_ATTR_NAME, ovs_vport_name(vport)) || nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex)) goto nla_put_failure; rcu_read_lock(); net_vport = dev_net_rcu(vport->dev); if (!net_eq(net, net_vport)) { int id = peernet2id_alloc(net, net_vport, GFP_ATOMIC); if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id)) goto nla_put_failure_unlock; } rcu_read_unlock(); ovs_vport_get_stats(vport, &vport_stats); if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats), &vport_stats, OVS_VPORT_ATTR_PAD)) goto nla_put_failure; if (ovs_vport_get_upcall_stats(vport, skb)) goto nla_put_failure; if (ovs_vport_get_upcall_portids(vport, skb)) goto nla_put_failure; err = ovs_vport_get_options(vport, skb); if (err == -EMSGSIZE) goto error; genlmsg_end(skb, ovs_header); return 0; nla_put_failure_unlock: rcu_read_unlock(); nla_put_failure: err = -EMSGSIZE; error: genlmsg_cancel(skb, ovs_header); return err; } static struct sk_buff *ovs_vport_cmd_alloc_info(void) { return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); } /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; int retval; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd, GFP_KERNEL); BUG_ON(retval < 0); return skb; } /* Called with ovs_mutex or RCU read lock. */ static struct vport *lookup_vport(struct net *net, const struct ovs_header *ovs_header, struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) { struct datapath *dp; struct vport *vport; if (a[OVS_VPORT_ATTR_IFINDEX]) return ERR_PTR(-EOPNOTSUPP); if (a[OVS_VPORT_ATTR_NAME]) { vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); if (!vport) return ERR_PTR(-ENODEV); if (ovs_header->dp_ifindex && ovs_header->dp_ifindex != get_dpifindex(vport->dp)) return ERR_PTR(-ENODEV); return vport; } else if (a[OVS_VPORT_ATTR_PORT_NO]) { u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); if (port_no >= DP_MAX_PORTS) return ERR_PTR(-EFBIG); dp = get_dp(net, ovs_header->dp_ifindex); if (!dp) return ERR_PTR(-ENODEV); vport = ovs_vport_ovsl_rcu(dp, port_no); if (!vport) return ERR_PTR(-ENODEV); return vport; } else return ERR_PTR(-EINVAL); } static unsigned int ovs_get_max_headroom(struct datapath *dp) { unsigned int dev_headroom, max_headroom = 0; struct net_device *dev; struct vport *vport; int i; for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node, lockdep_ovsl_is_held()) { dev = vport->dev; dev_headroom = netdev_get_fwd_headroom(dev); if (dev_headroom > max_headroom) max_headroom = dev_headroom; } } return max_headroom; } /* Called with ovs_mutex */ static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom) { struct vport *vport; int i; dp->max_headroom = new_headroom; for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node, lockdep_ovsl_is_held()) netdev_set_rx_headroom(vport->dev, new_headroom); } } static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = genl_info_userhdr(info); struct vport_parms parms; struct sk_buff *reply; struct vport *vport; struct datapath *dp; unsigned int new_headroom; u32 port_no; int err; if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || !a[OVS_VPORT_ATTR_UPCALL_PID]) return -EINVAL; parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]); if (a[OVS_VPORT_ATTR_IFINDEX] && parms.type != OVS_VPORT_TYPE_INTERNAL) return -EOPNOTSUPP; port_no = nla_get_u32_default(a[OVS_VPORT_ATTR_PORT_NO], 0); if (port_no >= DP_MAX_PORTS) return -EFBIG; reply = ovs_vport_cmd_alloc_info(); if (!reply) return -ENOMEM; ovs_lock(); restart: dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); err = -ENODEV; if (!dp) goto exit_unlock_free; if (port_no) { vport = ovs_vport_ovsl(dp, port_no); err = -EBUSY; if (vport) goto exit_unlock_free; } else { for (port_no = 1; ; port_no++) { if (port_no >= DP_MAX_PORTS) { err = -EFBIG; goto exit_unlock_free; } vport = ovs_vport_ovsl(dp, port_no); if (!vport) break; } } parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]); parms.options = a[OVS_VPORT_ATTR_OPTIONS]; parms.dp = dp; parms.port_no = port_no; parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID]; parms.desired_ifindex = nla_get_s32_default(a[OVS_VPORT_ATTR_IFINDEX], 0); vport = new_vport(&parms); err = PTR_ERR(vport); if (IS_ERR(vport)) { if (err == -EAGAIN) goto restart; goto exit_unlock_free; } err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, OVS_VPORT_CMD_NEW, GFP_KERNEL); new_headroom = netdev_get_fwd_headroom(vport->dev); if (new_headroom > dp->max_headroom) ovs_update_headroom(dp, new_headroom); else netdev_set_rx_headroom(vport->dev, dp->max_headroom); BUG_ON(err < 0); ovs_unlock(); ovs_notify(&dp_vport_genl_family, reply, info); return 0; exit_unlock_free: ovs_unlock(); kfree_skb(reply); return err; } static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct sk_buff *reply; struct vport *vport; int err; reply = ovs_vport_cmd_alloc_info(); if (!reply) return -ENOMEM; ovs_lock(); vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a); err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock_free; if (a[OVS_VPORT_ATTR_TYPE] && nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) { err = -EINVAL; goto exit_unlock_free; } if (a[OVS_VPORT_ATTR_OPTIONS]) { err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); if (err) goto exit_unlock_free; } if (a[OVS_VPORT_ATTR_UPCALL_PID]) { struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID]; err = ovs_vport_set_upcall_portids(vport, ids); if (err) goto exit_unlock_free; } err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, OVS_VPORT_CMD_SET, GFP_KERNEL); BUG_ON(err < 0); ovs_unlock(); ovs_notify(&dp_vport_genl_family, reply, info); return 0; exit_unlock_free: ovs_unlock(); kfree_skb(reply); return err; } static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) { bool update_headroom = false; struct nlattr **a = info->attrs; struct sk_buff *reply; struct datapath *dp; struct vport *vport; unsigned int new_headroom; int err; reply = ovs_vport_cmd_alloc_info(); if (!reply) return -ENOMEM; ovs_lock(); vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a); err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock_free; if (vport->port_no == OVSP_LOCAL) { err = -EINVAL; goto exit_unlock_free; } err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, OVS_VPORT_CMD_DEL, GFP_KERNEL); BUG_ON(err < 0); /* the vport deletion may trigger dp headroom update */ dp = vport->dp; if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom) update_headroom = true; netdev_reset_rx_headroom(vport->dev); ovs_dp_detach_port(vport); if (update_headroom) { new_headroom = ovs_get_max_headroom(dp); if (new_headroom < dp->max_headroom) ovs_update_headroom(dp, new_headroom); } ovs_unlock(); ovs_notify(&dp_vport_genl_family, reply, info); return 0; exit_unlock_free: ovs_unlock(); kfree_skb(reply); return err; } static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = genl_info_userhdr(info); struct sk_buff *reply; struct vport *vport; int err; reply = ovs_vport_cmd_alloc_info(); if (!reply) return -ENOMEM; rcu_read_lock(); vport = lookup_vport(sock_net(skb->sk), ovs_header, a); err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock_free; err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, OVS_VPORT_CMD_GET, GFP_ATOMIC); BUG_ON(err < 0); rcu_read_unlock(); return genlmsg_reply(reply, info); exit_unlock_free: rcu_read_unlock(); kfree_skb(reply); return err; } static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); struct datapath *dp; int bucket = cb->args[0], skip = cb->args[1]; int i, j = 0; rcu_read_lock(); dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) { rcu_read_unlock(); return -ENODEV; } for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; j = 0; hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { if (j >= skip && ovs_vport_cmd_fill_info(vport, skb, sock_net(skb->sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_VPORT_CMD_GET, GFP_ATOMIC) < 0) goto out; j++; } skip = 0; } out: rcu_read_unlock(); cb->args[0] = i; cb->args[1] = j; return skb->len; } static void ovs_dp_masks_rebalance(struct work_struct *work) { struct ovs_net *ovs_net = container_of(work, struct ovs_net, masks_rebalance.work); struct datapath *dp; ovs_lock(); list_for_each_entry(dp, &ovs_net->dps, list_node) ovs_flow_masks_rebalance(&dp->table); ovs_unlock(); schedule_delayed_work(&ovs_net->masks_rebalance, msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL)); } static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC }, [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, [OVS_VPORT_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0), [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 }, [OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED }, }; static const struct genl_small_ops dp_vport_genl_ops[] = { { .cmd = OVS_VPORT_CMD_NEW, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_vport_cmd_new }, { .cmd = OVS_VPORT_CMD_DEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_vport_cmd_del }, { .cmd = OVS_VPORT_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ .doit = ovs_vport_cmd_get, .dumpit = ovs_vport_cmd_dump }, { .cmd = OVS_VPORT_CMD_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_vport_cmd_set, }, }; struct genl_family dp_vport_genl_family __ro_after_init = { .hdrsize = sizeof(struct ovs_header), .name = OVS_VPORT_FAMILY, .version = OVS_VPORT_VERSION, .maxattr = OVS_VPORT_ATTR_MAX, .policy = vport_policy, .netnsok = true, .parallel_ops = true, .small_ops = dp_vport_genl_ops, .n_small_ops = ARRAY_SIZE(dp_vport_genl_ops), .resv_start_op = OVS_VPORT_CMD_SET + 1, .mcgrps = &ovs_dp_vport_multicast_group, .n_mcgrps = 1, .module = THIS_MODULE, }; static struct genl_family * const dp_genl_families[] = { &dp_datapath_genl_family, &dp_vport_genl_family, &dp_flow_genl_family, &dp_packet_genl_family, &dp_meter_genl_family, #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) &dp_ct_limit_genl_family, #endif }; static void dp_unregister_genl(int n_families) { int i; for (i = 0; i < n_families; i++) genl_unregister_family(dp_genl_families[i]); } static int __init dp_register_genl(void) { int err; int i; for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { err = genl_register_family(dp_genl_families[i]); if (err) goto error; } return 0; error: dp_unregister_genl(i); return err; } static int __net_init ovs_init_net(struct net *net) { struct ovs_net *ovs_net = net_generic(net, ovs_net_id); int err; INIT_LIST_HEAD(&ovs_net->dps); INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq); INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance); err = ovs_ct_init(net); if (err) return err; schedule_delayed_work(&ovs_net->masks_rebalance, msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL)); return 0; } static void __net_exit list_vports_from_net(struct net *net, struct net *dnet, struct list_head *head) { struct ovs_net *ovs_net = net_generic(net, ovs_net_id); struct datapath *dp; list_for_each_entry(dp, &ovs_net->dps, list_node) { int i; for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) { if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL) continue; if (dev_net(vport->dev) == dnet) list_add(&vport->detach_list, head); } } } } static void __net_exit ovs_exit_net(struct net *dnet) { struct datapath *dp, *dp_next; struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id); struct vport *vport, *vport_next; struct net *net; LIST_HEAD(head); ovs_lock(); ovs_ct_exit(dnet); list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) __dp_destroy(dp); down_read(&net_rwsem); for_each_net(net) list_vports_from_net(net, dnet, &head); up_read(&net_rwsem); /* Detach all vports from given namespace. */ list_for_each_entry_safe(vport, vport_next, &head, detach_list) { list_del(&vport->detach_list); ovs_dp_detach_port(vport); } ovs_unlock(); cancel_delayed_work_sync(&ovs_net->masks_rebalance); cancel_work_sync(&ovs_net->dp_notify_work); } static struct pernet_operations ovs_net_ops = { .init = ovs_init_net, .exit = ovs_exit_net, .id = &ovs_net_id, .size = sizeof(struct ovs_net), }; static const char * const ovs_drop_reasons[] = { #define S(x) [(x) & ~SKB_DROP_REASON_SUBSYS_MASK] = (#x), OVS_DROP_REASONS(S) #undef S }; static struct drop_reason_list drop_reason_list_ovs = { .reasons = ovs_drop_reasons, .n_reasons = ARRAY_SIZE(ovs_drop_reasons), }; static int __init ovs_alloc_percpu_storage(void) { unsigned int cpu; ovs_pcpu_storage = alloc_percpu(*ovs_pcpu_storage); if (!ovs_pcpu_storage) return -ENOMEM; for_each_possible_cpu(cpu) { struct ovs_pcpu_storage *ovs_pcpu; ovs_pcpu = per_cpu_ptr(ovs_pcpu_storage, cpu); local_lock_init(&ovs_pcpu->bh_lock); } return 0; } static void ovs_free_percpu_storage(void) { free_percpu(ovs_pcpu_storage); } static int __init dp_init(void) { int err; BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof_field(struct sk_buff, cb)); pr_info("Open vSwitch switching datapath\n"); err = ovs_alloc_percpu_storage(); if (err) goto error; err = ovs_internal_dev_rtnl_link_register(); if (err) goto error; err = ovs_flow_init(); if (err) goto error_unreg_rtnl_link; err = ovs_vport_init(); if (err) goto error_flow_exit; err = register_pernet_device(&ovs_net_ops); if (err) goto error_vport_exit; err = register_netdevice_notifier(&ovs_dp_device_notifier); if (err) goto error_netns_exit; err = ovs_netdev_init(); if (err) goto error_unreg_notifier; err = dp_register_genl(); if (err < 0) goto error_unreg_netdev; drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH, &drop_reason_list_ovs); return 0; error_unreg_netdev: ovs_netdev_exit(); error_unreg_notifier: unregister_netdevice_notifier(&ovs_dp_device_notifier); error_netns_exit: unregister_pernet_device(&ovs_net_ops); error_vport_exit: ovs_vport_exit(); error_flow_exit: ovs_flow_exit(); error_unreg_rtnl_link: ovs_internal_dev_rtnl_link_unregister(); error: ovs_free_percpu_storage(); return err; } static void dp_cleanup(void) { dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); ovs_netdev_exit(); unregister_netdevice_notifier(&ovs_dp_device_notifier); unregister_pernet_device(&ovs_net_ops); drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH); rcu_barrier(); ovs_vport_exit(); ovs_flow_exit(); ovs_internal_dev_rtnl_link_unregister(); ovs_free_percpu_storage(); } module_init(dp_init); module_exit(dp_cleanup); MODULE_DESCRIPTION("Open vSwitch switching datapath"); MODULE_LICENSE("GPL"); MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY); MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY); MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY); MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY); MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY); MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY);
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 // SPDX-License-Identifier: GPL-2.0+ /* * Driver for SanDisk SDDR-55 SmartMedia reader * * SDDR55 driver v0.1: * * First release * * Current development and maintenance by: * (c) 2002 Simon Munton */ #include <linux/jiffies.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/slab.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include "usb.h" #include "transport.h" #include "protocol.h" #include "debug.h" #include "scsiglue.h" #define DRV_NAME "ums-sddr55" MODULE_DESCRIPTION("Driver for SanDisk SDDR-55 SmartMedia reader"); MODULE_AUTHOR("Simon Munton"); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS("USB_STORAGE"); /* * The table of devices */ #define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \ vendorName, productName, useProtocol, useTransport, \ initFunction, flags) \ { USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax), \ .driver_info = (flags) } static const struct usb_device_id sddr55_usb_ids[] = { # include "unusual_sddr55.h" { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, sddr55_usb_ids); #undef UNUSUAL_DEV /* * The flags table */ #define UNUSUAL_DEV(idVendor, idProduct, bcdDeviceMin, bcdDeviceMax, \ vendor_name, product_name, use_protocol, use_transport, \ init_function, Flags) \ { \ .vendorName = vendor_name, \ .productName = product_name, \ .useProtocol = use_protocol, \ .useTransport = use_transport, \ .initFunction = init_function, \ } static const struct us_unusual_dev sddr55_unusual_dev_list[] = { # include "unusual_sddr55.h" { } /* Terminating entry */ }; #undef UNUSUAL_DEV #define short_pack(lsb,msb) ( ((u16)(lsb)) | ( ((u16)(msb))<<8 ) ) #define LSB_of(s) ((s)&0xFF) #define MSB_of(s) ((s)>>8) #define PAGESIZE 512 #define set_sense_info(sk, asc, ascq) \ do { \ info->sense_data[2] = sk; \ info->sense_data[12] = asc; \ info->sense_data[13] = ascq; \ } while (0) struct sddr55_card_info { unsigned long capacity; /* Size of card in bytes */ int max_log_blks; /* maximum number of logical blocks */ int pageshift; /* log2 of pagesize */ int smallpageshift; /* 1 if pagesize == 256 */ int blocksize; /* Size of block in pages */ int blockshift; /* log2 of blocksize */ int blockmask; /* 2^blockshift - 1 */ int read_only; /* non zero if card is write protected */ int force_read_only; /* non zero if we find a map error*/ int *lba_to_pba; /* logical to physical map */ int *pba_to_lba; /* physical to logical map */ int fatal_error; /* set if we detect something nasty */ unsigned long last_access; /* number of jiffies since we last talked to device */ unsigned char sense_data[18]; }; #define NOT_ALLOCATED 0xffffffff #define BAD_BLOCK 0xffff #define CIS_BLOCK 0x400 #define UNUSED_BLOCK 0x3ff static int sddr55_bulk_transport(struct us_data *us, int direction, unsigned char *data, unsigned int len) { struct sddr55_card_info *info = (struct sddr55_card_info *)us->extra; unsigned int pipe = (direction == DMA_FROM_DEVICE) ? us->recv_bulk_pipe : us->send_bulk_pipe; if (!len) return USB_STOR_XFER_GOOD; info->last_access = jiffies; return usb_stor_bulk_transfer_buf(us, pipe, data, len, NULL); } /* * check if card inserted, if there is, update read_only status * return non zero if no card */ static int sddr55_status(struct us_data *us) { int result; unsigned char *command = us->iobuf; unsigned char *status = us->iobuf; struct sddr55_card_info *info = (struct sddr55_card_info *)us->extra; /* send command */ memset(command, 0, 8); command[5] = 0xB0; command[7] = 0x80; result = sddr55_bulk_transport(us, DMA_TO_DEVICE, command, 8); usb_stor_dbg(us, "Result for send_command in status %d\n", result); if (result != USB_STOR_XFER_GOOD) { set_sense_info (4, 0, 0); /* hardware error */ return USB_STOR_TRANSPORT_ERROR; } result = sddr55_bulk_transport(us, DMA_FROM_DEVICE, status, 4); /* expect to get short transfer if no card fitted */ if (result == USB_STOR_XFER_SHORT || result == USB_STOR_XFER_STALLED) { /* had a short transfer, no card inserted, free map memory */ kfree(info->lba_to_pba); kfree(info->pba_to_lba); info->lba_to_pba = NULL; info->pba_to_lba = NULL; info->fatal_error = 0; info->force_read_only = 0; set_sense_info (2, 0x3a, 0); /* not ready, medium not present */ return USB_STOR_TRANSPORT_FAILED; } if (result != USB_STOR_XFER_GOOD) { set_sense_info (4, 0, 0); /* hardware error */ return USB_STOR_TRANSPORT_FAILED; } /* check write protect status */ info->read_only = (status[0] & 0x20); /* now read status */ result = sddr55_bulk_transport(us, DMA_FROM_DEVICE, status, 2); if (result != USB_STOR_XFER_GOOD) { set_sense_info (4, 0, 0); /* hardware error */ } return (result == USB_STOR_XFER_GOOD ? USB_STOR_TRANSPORT_GOOD : USB_STOR_TRANSPORT_FAILED); } static int sddr55_read_data(struct us_data *us, unsigned int lba, unsigned int page, unsigned short sectors) { int result = USB_STOR_TRANSPORT_GOOD; unsigned char *command = us->iobuf; unsigned char *status = us->iobuf; struct sddr55_card_info *info = (struct sddr55_card_info *)us->extra; unsigned char *buffer; unsigned int pba; unsigned int address; unsigned short pages; unsigned int len, offset; struct scatterlist *sg; // Since we only read in one block at a time, we have to create // a bounce buffer and move the data a piece at a time between the // bounce buffer and the actual transfer buffer. len = min_t(unsigned int, sectors, info->blocksize >> info->smallpageshift) * PAGESIZE; buffer = kmalloc(len, GFP_NOIO); if (buffer == NULL) return USB_STOR_TRANSPORT_ERROR; /* out of memory */ offset = 0; sg = NULL; while (sectors>0) { /* have we got to end? */ if (lba >= info->max_log_blks) break; pba = info->lba_to_pba[lba]; // Read as many sectors as possible in this block pages = min_t(unsigned int, sectors << info->smallpageshift, info->blocksize - page); len = pages << info->pageshift; usb_stor_dbg(us, "Read %02X pages, from PBA %04X (LBA %04X) page %02X\n", pages, pba, lba, page); if (pba == NOT_ALLOCATED) { /* no pba for this lba, fill with zeroes */ memset (buffer, 0, len); } else { address = (pba << info->blockshift) + page; command[0] = 0; command[1] = LSB_of(address>>16); command[2] = LSB_of(address>>8); command[3] = LSB_of(address); command[4] = 0; command[5] = 0xB0; command[6] = LSB_of(pages << (1 - info->smallpageshift)); command[7] = 0x85; /* send command */ result = sddr55_bulk_transport(us, DMA_TO_DEVICE, command, 8); usb_stor_dbg(us, "Result for send_command in read_data %d\n", result); if (result != USB_STOR_XFER_GOOD) { result = USB_STOR_TRANSPORT_ERROR; goto leave; } /* read data */ result = sddr55_bulk_transport(us, DMA_FROM_DEVICE, buffer, len); if (result != USB_STOR_XFER_GOOD) { result = USB_STOR_TRANSPORT_ERROR; goto leave; } /* now read status */ result = sddr55_bulk_transport(us, DMA_FROM_DEVICE, status, 2); if (result != USB_STOR_XFER_GOOD) { result = USB_STOR_TRANSPORT_ERROR; goto leave; } /* check status for error */ if (status[0] == 0xff && status[1] == 0x4) { set_sense_info (3, 0x11, 0); result = USB_STOR_TRANSPORT_FAILED; goto leave; } } // Store the data in the transfer buffer usb_stor_access_xfer_buf(buffer, len, us->srb, &sg, &offset, TO_XFER_BUF); page = 0; lba++; sectors -= pages >> info->smallpageshift; } result = USB_STOR_TRANSPORT_GOOD; leave: kfree(buffer); return result; } static int sddr55_write_data(struct us_data *us, unsigned int lba, unsigned int page, unsigned short sectors) { int result = USB_STOR_TRANSPORT_GOOD; unsigned char *command = us->iobuf; unsigned char *status = us->iobuf; struct sddr55_card_info *info = (struct sddr55_card_info *)us->extra; unsigned char *buffer; unsigned int pba; unsigned int new_pba; unsigned int address; unsigned short pages; int i; unsigned int len, offset; struct scatterlist *sg; /* check if we are allowed to write */ if (info->read_only || info->force_read_only) { set_sense_info (7, 0x27, 0); /* read only */ return USB_STOR_TRANSPORT_FAILED; } // Since we only write one block at a time, we have to create // a bounce buffer and move the data a piece at a time between the // bounce buffer and the actual transfer buffer. len = min_t(unsigned int, sectors, info->blocksize >> info->smallpageshift) * PAGESIZE; buffer = kmalloc(len, GFP_NOIO); if (buffer == NULL) return USB_STOR_TRANSPORT_ERROR; offset = 0; sg = NULL; while (sectors > 0) { /* have we got to end? */ if (lba >= info->max_log_blks) break; pba = info->lba_to_pba[lba]; // Write as many sectors as possible in this block pages = min_t(unsigned int, sectors << info->smallpageshift, info->blocksize - page); len = pages << info->pageshift; // Get the data from the transfer buffer usb_stor_access_xfer_buf(buffer, len, us->srb, &sg, &offset, FROM_XFER_BUF); usb_stor_dbg(us, "Write %02X pages, to PBA %04X (LBA %04X) page %02X\n", pages, pba, lba, page); command[4] = 0; if (pba == NOT_ALLOCATED) { /* no pba allocated for this lba, find a free pba to use */ int max_pba = (info->max_log_blks / 250 ) * 256; int found_count = 0; int found_pba = -1; /* set pba to first block in zone lba is in */ pba = (lba / 1000) * 1024; usb_stor_dbg(us, "No PBA for LBA %04X\n", lba); if (max_pba > 1024) max_pba = 1024; /* * Scan through the map looking for an unused block * leave 16 unused blocks at start (or as many as * possible) since the sddr55 seems to reuse a used * block when it shouldn't if we don't leave space. */ for (i = 0; i < max_pba; i++, pba++) { if (info->pba_to_lba[pba] == UNUSED_BLOCK) { found_pba = pba; if (found_count++ > 16) break; } } pba = found_pba; if (pba == -1) { /* oh dear */ usb_stor_dbg(us, "Couldn't find unallocated block\n"); set_sense_info (3, 0x31, 0); /* medium error */ result = USB_STOR_TRANSPORT_FAILED; goto leave; } usb_stor_dbg(us, "Allocating PBA %04X for LBA %04X\n", pba, lba); /* set writing to unallocated block flag */ command[4] = 0x40; } address = (pba << info->blockshift) + page; command[1] = LSB_of(address>>16); command[2] = LSB_of(address>>8); command[3] = LSB_of(address); /* set the lba into the command, modulo 1000 */ command[0] = LSB_of(lba % 1000); command[6] = MSB_of(lba % 1000); command[4] |= LSB_of(pages >> info->smallpageshift); command[5] = 0xB0; command[7] = 0x86; /* send command */ result = sddr55_bulk_transport(us, DMA_TO_DEVICE, command, 8); if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Result for send_command in write_data %d\n", result); /* set_sense_info is superfluous here? */ set_sense_info (3, 0x3, 0);/* peripheral write error */ result = USB_STOR_TRANSPORT_FAILED; goto leave; } /* send the data */ result = sddr55_bulk_transport(us, DMA_TO_DEVICE, buffer, len); if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Result for send_data in write_data %d\n", result); /* set_sense_info is superfluous here? */ set_sense_info (3, 0x3, 0);/* peripheral write error */ result = USB_STOR_TRANSPORT_FAILED; goto leave; } /* now read status */ result = sddr55_bulk_transport(us, DMA_FROM_DEVICE, status, 6); if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Result for get_status in write_data %d\n", result); /* set_sense_info is superfluous here? */ set_sense_info (3, 0x3, 0);/* peripheral write error */ result = USB_STOR_TRANSPORT_FAILED; goto leave; } new_pba = (status[3] + (status[4] << 8) + (status[5] << 16)) >> info->blockshift; /* check status for error */ if (status[0] == 0xff && status[1] == 0x4) { info->pba_to_lba[new_pba] = BAD_BLOCK; set_sense_info (3, 0x0c, 0); result = USB_STOR_TRANSPORT_FAILED; goto leave; } usb_stor_dbg(us, "Updating maps for LBA %04X: old PBA %04X, new PBA %04X\n", lba, pba, new_pba); /* update the lba<->pba maps, note new_pba might be the same as pba */ info->lba_to_pba[lba] = new_pba; info->pba_to_lba[pba] = UNUSED_BLOCK; /* check that new_pba wasn't already being used */ if (info->pba_to_lba[new_pba] != UNUSED_BLOCK) { printk(KERN_ERR "sddr55 error: new PBA %04X already in use for LBA %04X\n", new_pba, info->pba_to_lba[new_pba]); info->fatal_error = 1; set_sense_info (3, 0x31, 0); result = USB_STOR_TRANSPORT_FAILED; goto leave; } /* update the pba<->lba maps for new_pba */ info->pba_to_lba[new_pba] = lba % 1000; page = 0; lba++; sectors -= pages >> info->smallpageshift; } result = USB_STOR_TRANSPORT_GOOD; leave: kfree(buffer); return result; } static int sddr55_read_deviceID(struct us_data *us, unsigned char *manufacturerID, unsigned char *deviceID) { int result; unsigned char *command = us->iobuf; unsigned char *content = us->iobuf; memset(command, 0, 8); command[5] = 0xB0; command[7] = 0x84; result = sddr55_bulk_transport(us, DMA_TO_DEVICE, command, 8); usb_stor_dbg(us, "Result of send_control for device ID is %d\n", result); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; result = sddr55_bulk_transport(us, DMA_FROM_DEVICE, content, 4); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; *manufacturerID = content[0]; *deviceID = content[1]; if (content[0] != 0xff) { result = sddr55_bulk_transport(us, DMA_FROM_DEVICE, content, 2); } return USB_STOR_TRANSPORT_GOOD; } static int sddr55_reset(struct us_data *us) { return 0; } static unsigned long sddr55_get_capacity(struct us_data *us) { unsigned char manufacturerID; unsigned char deviceID; int result; struct sddr55_card_info *info = (struct sddr55_card_info *)us->extra; usb_stor_dbg(us, "Reading capacity...\n"); result = sddr55_read_deviceID(us, &manufacturerID, &deviceID); usb_stor_dbg(us, "Result of read_deviceID is %d\n", result); if (result != USB_STOR_XFER_GOOD) return 0; usb_stor_dbg(us, "Device ID = %02X\n", deviceID); usb_stor_dbg(us, "Manuf ID = %02X\n", manufacturerID); info->pageshift = 9; info->smallpageshift = 0; info->blocksize = 16; info->blockshift = 4; info->blockmask = 15; switch (deviceID) { case 0x6e: // 1MB case 0xe8: case 0xec: info->pageshift = 8; info->smallpageshift = 1; return 0x00100000; case 0xea: // 2MB case 0x64: info->pageshift = 8; info->smallpageshift = 1; fallthrough; case 0x5d: // 5d is a ROM card with pagesize 512. return 0x00200000; case 0xe3: // 4MB case 0xe5: case 0x6b: case 0xd5: return 0x00400000; case 0xe6: // 8MB case 0xd6: return 0x00800000; case 0x73: // 16MB info->blocksize = 32; info->blockshift = 5; info->blockmask = 31; return 0x01000000; case 0x75: // 32MB info->blocksize = 32; info->blockshift = 5; info->blockmask = 31; return 0x02000000; case 0x76: // 64MB info->blocksize = 32; info->blockshift = 5; info->blockmask = 31; return 0x04000000; case 0x79: // 128MB info->blocksize = 32; info->blockshift = 5; info->blockmask = 31; return 0x08000000; default: // unknown return 0; } } static int sddr55_read_map(struct us_data *us) { struct sddr55_card_info *info = (struct sddr55_card_info *)(us->extra); int numblocks; unsigned char *buffer; unsigned char *command = us->iobuf; int i; unsigned short lba; unsigned short max_lba; int result; if (!info->capacity) return -1; numblocks = info->capacity >> (info->blockshift + info->pageshift); buffer = kmalloc_array(numblocks, 2, GFP_NOIO ); if (!buffer) return -1; memset(command, 0, 8); command[5] = 0xB0; command[6] = numblocks * 2 / 256; command[7] = 0x8A; result = sddr55_bulk_transport(us, DMA_TO_DEVICE, command, 8); if ( result != USB_STOR_XFER_GOOD) { kfree (buffer); return -1; } result = sddr55_bulk_transport(us, DMA_FROM_DEVICE, buffer, numblocks * 2); if ( result != USB_STOR_XFER_GOOD) { kfree (buffer); return -1; } result = sddr55_bulk_transport(us, DMA_FROM_DEVICE, command, 2); if ( result != USB_STOR_XFER_GOOD) { kfree (buffer); return -1; } kfree(info->lba_to_pba); kfree(info->pba_to_lba); info->lba_to_pba = kmalloc_array(numblocks, sizeof(int), GFP_NOIO); info->pba_to_lba = kmalloc_array(numblocks, sizeof(int), GFP_NOIO); if (info->lba_to_pba == NULL || info->pba_to_lba == NULL) { kfree(info->lba_to_pba); kfree(info->pba_to_lba); info->lba_to_pba = NULL; info->pba_to_lba = NULL; kfree(buffer); return -1; } memset(info->lba_to_pba, 0xff, numblocks*sizeof(int)); memset(info->pba_to_lba, 0xff, numblocks*sizeof(int)); /* set maximum lba */ max_lba = info->max_log_blks; if (max_lba > 1000) max_lba = 1000; /* * Each block is 64 bytes of control data, so block i is located in * scatterlist block i*64/128k = i*(2^6)*(2^-17) = i*(2^-11) */ for (i=0; i<numblocks; i++) { int zone = i / 1024; lba = short_pack(buffer[i * 2], buffer[i * 2 + 1]); /* * Every 1024 physical blocks ("zone"), the LBA numbers * go back to zero, but are within a higher * block of LBA's. Also, there is a maximum of * 1000 LBA's per zone. In other words, in PBA * 1024-2047 you will find LBA 0-999 which are * really LBA 1000-1999. Yes, this wastes 24 * physical blocks per zone. Go figure. * These devices can have blocks go bad, so there * are 24 spare blocks to use when blocks do go bad. */ /* * SDDR55 returns 0xffff for a bad block, and 0x400 for the * CIS block. (Is this true for cards 8MB or less??) * Record these in the physical to logical map */ info->pba_to_lba[i] = lba; if (lba >= max_lba) { continue; } if (info->lba_to_pba[lba + zone * 1000] != NOT_ALLOCATED && !info->force_read_only) { printk(KERN_WARNING "sddr55: map inconsistency at LBA %04X\n", lba + zone * 1000); info->force_read_only = 1; } if (lba<0x10 || (lba>=0x3E0 && lba<0x3EF)) usb_stor_dbg(us, "LBA %04X <-> PBA %04X\n", lba, i); info->lba_to_pba[lba + zone * 1000] = i; } kfree(buffer); return 0; } static void sddr55_card_info_destructor(void *extra) { struct sddr55_card_info *info = (struct sddr55_card_info *)extra; if (!extra) return; kfree(info->lba_to_pba); kfree(info->pba_to_lba); } /* * Transport for the Sandisk SDDR-55 */ static int sddr55_transport(struct scsi_cmnd *srb, struct us_data *us) { int result; static const unsigned char inquiry_response[8] = { 0x00, 0x80, 0x00, 0x02, 0x1F, 0x00, 0x00, 0x00 }; // write-protected for now, no block descriptor support static const unsigned char mode_page_01[20] = { 0x0, 0x12, 0x00, 0x80, 0x0, 0x0, 0x0, 0x0, 0x01, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; unsigned char *ptr = us->iobuf; unsigned long capacity; unsigned int lba; unsigned int pba; unsigned int page; unsigned short pages; struct sddr55_card_info *info; if (!us->extra) { us->extra = kzalloc( sizeof(struct sddr55_card_info), GFP_NOIO); if (!us->extra) return USB_STOR_TRANSPORT_ERROR; us->extra_destructor = sddr55_card_info_destructor; } info = (struct sddr55_card_info *)(us->extra); if (srb->cmnd[0] == REQUEST_SENSE) { usb_stor_dbg(us, "request sense %02x/%02x/%02x\n", info->sense_data[2], info->sense_data[12], info->sense_data[13]); memcpy (ptr, info->sense_data, sizeof info->sense_data); ptr[0] = 0x70; ptr[7] = 11; usb_stor_set_xfer_buf (ptr, sizeof info->sense_data, srb); memset (info->sense_data, 0, sizeof info->sense_data); return USB_STOR_TRANSPORT_GOOD; } memset (info->sense_data, 0, sizeof info->sense_data); /* * Dummy up a response for INQUIRY since SDDR55 doesn't * respond to INQUIRY commands */ if (srb->cmnd[0] == INQUIRY) { memcpy(ptr, inquiry_response, 8); fill_inquiry_response(us, ptr, 36); return USB_STOR_TRANSPORT_GOOD; } /* * only check card status if the map isn't allocated, ie no card seen yet * or if it's been over half a second since we last accessed it */ if (info->lba_to_pba == NULL || time_after(jiffies, info->last_access + HZ/2)) { /* check to see if a card is fitted */ result = sddr55_status (us); if (result) { result = sddr55_status (us); if (!result) { set_sense_info (6, 0x28, 0); /* new media, set unit attention, not ready to ready */ } return USB_STOR_TRANSPORT_FAILED; } } /* * if we detected a problem with the map when writing, * don't allow any more access */ if (info->fatal_error) { set_sense_info (3, 0x31, 0); return USB_STOR_TRANSPORT_FAILED; } if (srb->cmnd[0] == READ_CAPACITY) { capacity = sddr55_get_capacity(us); if (!capacity) { set_sense_info (3, 0x30, 0); /* incompatible medium */ return USB_STOR_TRANSPORT_FAILED; } info->capacity = capacity; /* * figure out the maximum logical block number, allowing for * the fact that only 250 out of every 256 are used */ info->max_log_blks = ((info->capacity >> (info->pageshift + info->blockshift)) / 256) * 250; /* * Last page in the card, adjust as we only use 250 out of * every 256 pages */ capacity = (capacity / 256) * 250; capacity /= PAGESIZE; capacity--; ((__be32 *) ptr)[0] = cpu_to_be32(capacity); ((__be32 *) ptr)[1] = cpu_to_be32(PAGESIZE); usb_stor_set_xfer_buf(ptr, 8, srb); sddr55_read_map(us); return USB_STOR_TRANSPORT_GOOD; } if (srb->cmnd[0] == MODE_SENSE_10) { memcpy(ptr, mode_page_01, sizeof mode_page_01); ptr[3] = (info->read_only || info->force_read_only) ? 0x80 : 0; usb_stor_set_xfer_buf(ptr, sizeof(mode_page_01), srb); if ( (srb->cmnd[2] & 0x3F) == 0x01 ) { usb_stor_dbg(us, "Dummy up request for mode page 1\n"); return USB_STOR_TRANSPORT_GOOD; } else if ( (srb->cmnd[2] & 0x3F) == 0x3F ) { usb_stor_dbg(us, "Dummy up request for all mode pages\n"); return USB_STOR_TRANSPORT_GOOD; } set_sense_info (5, 0x24, 0); /* invalid field in command */ return USB_STOR_TRANSPORT_FAILED; } if (srb->cmnd[0] == ALLOW_MEDIUM_REMOVAL) { usb_stor_dbg(us, "%s medium removal. Not that I can do anything about it...\n", (srb->cmnd[4]&0x03) ? "Prevent" : "Allow"); return USB_STOR_TRANSPORT_GOOD; } if (srb->cmnd[0] == READ_10 || srb->cmnd[0] == WRITE_10) { page = short_pack(srb->cmnd[3], srb->cmnd[2]); page <<= 16; page |= short_pack(srb->cmnd[5], srb->cmnd[4]); pages = short_pack(srb->cmnd[8], srb->cmnd[7]); page <<= info->smallpageshift; // convert page to block and page-within-block lba = page >> info->blockshift; page = page & info->blockmask; // locate physical block corresponding to logical block if (lba >= info->max_log_blks) { usb_stor_dbg(us, "Error: Requested LBA %04X exceeds maximum block %04X\n", lba, info->max_log_blks - 1); set_sense_info (5, 0x24, 0); /* invalid field in command */ return USB_STOR_TRANSPORT_FAILED; } pba = info->lba_to_pba[lba]; if (srb->cmnd[0] == WRITE_10) { usb_stor_dbg(us, "WRITE_10: write block %04X (LBA %04X) page %01X pages %d\n", pba, lba, page, pages); return sddr55_write_data(us, lba, page, pages); } else { usb_stor_dbg(us, "READ_10: read block %04X (LBA %04X) page %01X pages %d\n", pba, lba, page, pages); return sddr55_read_data(us, lba, page, pages); } } if (srb->cmnd[0] == TEST_UNIT_READY) { return USB_STOR_TRANSPORT_GOOD; } if (srb->cmnd[0] == START_STOP) { return USB_STOR_TRANSPORT_GOOD; } set_sense_info (5, 0x20, 0); /* illegal command */ return USB_STOR_TRANSPORT_FAILED; // FIXME: sense buffer? } static struct scsi_host_template sddr55_host_template; static int sddr55_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct us_data *us; int result; result = usb_stor_probe1(&us, intf, id, (id - sddr55_usb_ids) + sddr55_unusual_dev_list, &sddr55_host_template); if (result) return result; us->transport_name = "SDDR55"; us->transport = sddr55_transport; us->transport_reset = sddr55_reset; us->max_lun = 0; result = usb_stor_probe2(us); return result; } static struct usb_driver sddr55_driver = { .name = DRV_NAME, .probe = sddr55_probe, .disconnect = usb_stor_disconnect, .suspend = usb_stor_suspend, .resume = usb_stor_resume, .reset_resume = usb_stor_reset_resume, .pre_reset = usb_stor_pre_reset, .post_reset = usb_stor_post_reset, .id_table = sddr55_usb_ids, .soft_unbind = 1, .no_dynamic_id = 1, }; module_usb_stor_driver(sddr55_driver, sddr55_host_template, DRV_NAME);
1658 900 2 443 273 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 /* SPDX-License-Identifier: GPL-2.0+ */ #ifndef _LINUX_MAPLE_TREE_H #define _LINUX_MAPLE_TREE_H /* * Maple Tree - An RCU-safe adaptive tree for storing ranges * Copyright (c) 2018-2022 Oracle * Authors: Liam R. Howlett <Liam.Howlett@Oracle.com> * Matthew Wilcox <willy@infradead.org> */ #include <linux/kernel.h> #include <linux/rcupdate.h> #include <linux/spinlock.h> /* #define CONFIG_MAPLE_RCU_DISABLED */ /* * Allocated nodes are mutable until they have been inserted into the tree, * at which time they cannot change their type until they have been removed * from the tree and an RCU grace period has passed. * * Removed nodes have their ->parent set to point to themselves. RCU readers * check ->parent before relying on the value that they loaded from the * slots array. This lets us reuse the slots array for the RCU head. * * Nodes in the tree point to their parent unless bit 0 is set. */ #if defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) /* 64bit sizes */ #define MAPLE_NODE_SLOTS 31 /* 256 bytes including ->parent */ #define MAPLE_RANGE64_SLOTS 16 /* 256 bytes */ #define MAPLE_ARANGE64_SLOTS 10 /* 240 bytes */ #define MAPLE_ALLOC_SLOTS (MAPLE_NODE_SLOTS - 1) #else /* 32bit sizes */ #define MAPLE_NODE_SLOTS 63 /* 256 bytes including ->parent */ #define MAPLE_RANGE64_SLOTS 32 /* 256 bytes */ #define MAPLE_ARANGE64_SLOTS 21 /* 240 bytes */ #define MAPLE_ALLOC_SLOTS (MAPLE_NODE_SLOTS - 2) #endif /* defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) */ #define MAPLE_NODE_MASK 255UL /* * The node->parent of the root node has bit 0 set and the rest of the pointer * is a pointer to the tree itself. No more bits are available in this pointer * (on m68k, the data structure may only be 2-byte aligned). * * Internal non-root nodes can only have maple_range_* nodes as parents. The * parent pointer is 256B aligned like all other tree nodes. When storing a 32 * or 64 bit values, the offset can fit into 4 bits. The 16 bit values need an * extra bit to store the offset. This extra bit comes from a reuse of the last * bit in the node type. This is possible by using bit 1 to indicate if bit 2 * is part of the type or the slot. * * Once the type is decided, the decision of an allocation range type or a * range type is done by examining the immutable tree flag for the * MT_FLAGS_ALLOC_RANGE flag. * * Node types: * 0b??1 = Root * 0b?00 = 16 bit nodes * 0b010 = 32 bit nodes * 0b110 = 64 bit nodes * * Slot size and location in the parent pointer: * type : slot location * 0b??1 : Root * 0b?00 : 16 bit values, type in 0-1, slot in 2-6 * 0b010 : 32 bit values, type in 0-2, slot in 3-6 * 0b110 : 64 bit values, type in 0-2, slot in 3-6 */ /* * This metadata is used to optimize the gap updating code and in reverse * searching for gaps or any other code that needs to find the end of the data. */ struct maple_metadata { unsigned char end; /* end of data */ unsigned char gap; /* offset of largest gap */ }; /* * Leaf nodes do not store pointers to nodes, they store user data. Users may * store almost any bit pattern. As noted above, the optimisation of storing an * entry at 0 in the root pointer cannot be done for data which have the bottom * two bits set to '10'. We also reserve values with the bottom two bits set to * '10' which are below 4096 (ie 2, 6, 10 .. 4094) for internal use. Some APIs * return errnos as a negative errno shifted right by two bits and the bottom * two bits set to '10', and while choosing to store these values in the array * is not an error, it may lead to confusion if you're testing for an error with * mas_is_err(). * * Non-leaf nodes store the type of the node pointed to (enum maple_type in bits * 3-6), bit 2 is reserved. That leaves bits 0-1 unused for now. * * In regular B-Tree terms, pivots are called keys. The term pivot is used to * indicate that the tree is specifying ranges, Pivots may appear in the * subtree with an entry attached to the value whereas keys are unique to a * specific position of a B-tree. Pivot values are inclusive of the slot with * the same index. */ struct maple_range_64 { struct maple_pnode *parent; unsigned long pivot[MAPLE_RANGE64_SLOTS - 1]; union { void __rcu *slot[MAPLE_RANGE64_SLOTS]; struct { void __rcu *pad[MAPLE_RANGE64_SLOTS - 1]; struct maple_metadata meta; }; }; }; /* * At tree creation time, the user can specify that they're willing to trade off * storing fewer entries in a tree in return for storing more information in * each node. * * The maple tree supports recording the largest range of NULL entries available * in this node, also called gaps. This optimises the tree for allocating a * range. */ struct maple_arange_64 { struct maple_pnode *parent; unsigned long pivot[MAPLE_ARANGE64_SLOTS - 1]; void __rcu *slot[MAPLE_ARANGE64_SLOTS]; unsigned long gap[MAPLE_ARANGE64_SLOTS]; struct maple_metadata meta; }; struct maple_alloc { unsigned long total; unsigned char node_count; unsigned int request_count; struct maple_alloc *slot[MAPLE_ALLOC_SLOTS]; }; struct maple_topiary { struct maple_pnode *parent; struct maple_enode *next; /* Overlaps the pivot */ }; enum maple_type { maple_dense, maple_leaf_64, maple_range_64, maple_arange_64, }; enum store_type { wr_invalid, wr_new_root, wr_store_root, wr_exact_fit, wr_spanning_store, wr_split_store, wr_rebalance, wr_append, wr_node_store, wr_slot_store, }; /** * DOC: Maple tree flags * * * MT_FLAGS_ALLOC_RANGE - Track gaps in this tree * * MT_FLAGS_USE_RCU - Operate in RCU mode * * MT_FLAGS_HEIGHT_OFFSET - The position of the tree height in the flags * * MT_FLAGS_HEIGHT_MASK - The mask for the maple tree height value * * MT_FLAGS_LOCK_MASK - How the mt_lock is used * * MT_FLAGS_LOCK_IRQ - Acquired irq-safe * * MT_FLAGS_LOCK_BH - Acquired bh-safe * * MT_FLAGS_LOCK_EXTERN - mt_lock is not used * * MAPLE_HEIGHT_MAX The largest height that can be stored */ #define MT_FLAGS_ALLOC_RANGE 0x01 #define MT_FLAGS_USE_RCU 0x02 #define MT_FLAGS_HEIGHT_OFFSET 0x02 #define MT_FLAGS_HEIGHT_MASK 0x7C #define MT_FLAGS_LOCK_MASK 0x300 #define MT_FLAGS_LOCK_IRQ 0x100 #define MT_FLAGS_LOCK_BH 0x200 #define MT_FLAGS_LOCK_EXTERN 0x300 #define MT_FLAGS_ALLOC_WRAPPED 0x0800 #define MAPLE_HEIGHT_MAX 31 #define MAPLE_NODE_TYPE_MASK 0x0F #define MAPLE_NODE_TYPE_SHIFT 0x03 #define MAPLE_RESERVED_RANGE 4096 #ifdef CONFIG_LOCKDEP #define mt_lock_is_held(mt) \ (!(mt)->ma_external_lock || lock_is_held((mt)->ma_external_lock)) #define mt_write_lock_is_held(mt) \ (!(mt)->ma_external_lock || \ lock_is_held_type((mt)->ma_external_lock, 0)) #define mt_set_external_lock(mt, lock) \ (mt)->ma_external_lock = &(lock)->dep_map #define mt_on_stack(mt) (mt).ma_external_lock = NULL #else #define mt_lock_is_held(mt) 1 #define mt_write_lock_is_held(mt) 1 #define mt_set_external_lock(mt, lock) do { } while (0) #define mt_on_stack(mt) do { } while (0) #endif /* * If the tree contains a single entry at index 0, it is usually stored in * tree->ma_root. To optimise for the page cache, an entry which ends in '00', * '01' or '11' is stored in the root, but an entry which ends in '10' will be * stored in a node. Bits 3-6 are used to store enum maple_type. * * The flags are used both to store some immutable information about this tree * (set at tree creation time) and dynamic information set under the spinlock. * * Another use of flags are to indicate global states of the tree. This is the * case with the MT_FLAGS_USE_RCU flag, which indicates the tree is currently in * RCU mode. This mode was added to allow the tree to reuse nodes instead of * re-allocating and RCU freeing nodes when there is a single user. */ struct maple_tree { union { spinlock_t ma_lock; #ifdef CONFIG_LOCKDEP struct lockdep_map *ma_external_lock; #endif }; unsigned int ma_flags; void __rcu *ma_root; }; /** * MTREE_INIT() - Initialize a maple tree * @name: The maple tree name * @__flags: The maple tree flags * */ #define MTREE_INIT(name, __flags) { \ .ma_lock = __SPIN_LOCK_UNLOCKED((name).ma_lock), \ .ma_flags = __flags, \ .ma_root = NULL, \ } /** * MTREE_INIT_EXT() - Initialize a maple tree with an external lock. * @name: The tree name * @__flags: The maple tree flags * @__lock: The external lock */ #ifdef CONFIG_LOCKDEP #define MTREE_INIT_EXT(name, __flags, __lock) { \ .ma_external_lock = &(__lock).dep_map, \ .ma_flags = (__flags), \ .ma_root = NULL, \ } #else #define MTREE_INIT_EXT(name, __flags, __lock) MTREE_INIT(name, __flags) #endif #define DEFINE_MTREE(name) \ struct maple_tree name = MTREE_INIT(name, 0) #define mtree_lock(mt) spin_lock((&(mt)->ma_lock)) #define mtree_lock_nested(mas, subclass) \ spin_lock_nested((&(mt)->ma_lock), subclass) #define mtree_unlock(mt) spin_unlock((&(mt)->ma_lock)) /* * The Maple Tree squeezes various bits in at various points which aren't * necessarily obvious. Usually, this is done by observing that pointers are * N-byte aligned and thus the bottom log_2(N) bits are available for use. We * don't use the high bits of pointers to store additional information because * we don't know what bits are unused on any given architecture. * * Nodes are 256 bytes in size and are also aligned to 256 bytes, giving us 8 * low bits for our own purposes. Nodes are currently of 4 types: * 1. Single pointer (Range is 0-0) * 2. Non-leaf Allocation Range nodes * 3. Non-leaf Range nodes * 4. Leaf Range nodes All nodes consist of a number of node slots, * pivots, and a parent pointer. */ struct maple_node { union { struct { struct maple_pnode *parent; void __rcu *slot[MAPLE_NODE_SLOTS]; }; struct { void *pad; struct rcu_head rcu; struct maple_enode *piv_parent; unsigned char parent_slot; enum maple_type type; unsigned char slot_len; unsigned int ma_flags; }; struct maple_range_64 mr64; struct maple_arange_64 ma64; struct maple_alloc alloc; }; }; /* * More complicated stores can cause two nodes to become one or three and * potentially alter the height of the tree. Either half of the tree may need * to be rebalanced against the other. The ma_topiary struct is used to track * which nodes have been 'cut' from the tree so that the change can be done * safely at a later date. This is done to support RCU. */ struct ma_topiary { struct maple_enode *head; struct maple_enode *tail; struct maple_tree *mtree; }; void *mtree_load(struct maple_tree *mt, unsigned long index); int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry, gfp_t gfp); int mtree_insert_range(struct maple_tree *mt, unsigned long first, unsigned long last, void *entry, gfp_t gfp); int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long size, unsigned long min, unsigned long max, gfp_t gfp); int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long range_lo, unsigned long range_hi, unsigned long *next, gfp_t gfp); int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long size, unsigned long min, unsigned long max, gfp_t gfp); int mtree_store_range(struct maple_tree *mt, unsigned long first, unsigned long last, void *entry, gfp_t gfp); int mtree_store(struct maple_tree *mt, unsigned long index, void *entry, gfp_t gfp); void *mtree_erase(struct maple_tree *mt, unsigned long index); int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp); int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp); void mtree_destroy(struct maple_tree *mt); void __mt_destroy(struct maple_tree *mt); /** * mtree_empty() - Determine if a tree has any present entries. * @mt: Maple Tree. * * Context: Any context. * Return: %true if the tree contains only NULL pointers. */ static inline bool mtree_empty(const struct maple_tree *mt) { return mt->ma_root == NULL; } /* Advanced API */ /* * Maple State Status * ma_active means the maple state is pointing to a node and offset and can * continue operating on the tree. * ma_start means we have not searched the tree. * ma_root means we have searched the tree and the entry we found lives in * the root of the tree (ie it has index 0, length 1 and is the only entry in * the tree). * ma_none means we have searched the tree and there is no node in the * tree for this entry. For example, we searched for index 1 in an empty * tree. Or we have a tree which points to a full leaf node and we * searched for an entry which is larger than can be contained in that * leaf node. * ma_pause means the data within the maple state may be stale, restart the * operation * ma_overflow means the search has reached the upper limit of the search * ma_underflow means the search has reached the lower limit of the search * ma_error means there was an error, check the node for the error number. */ enum maple_status { ma_active, ma_start, ma_root, ma_none, ma_pause, ma_overflow, ma_underflow, ma_error, }; /* * The maple state is defined in the struct ma_state and is used to keep track * of information during operations, and even between operations when using the * advanced API. * * If state->node has bit 0 set then it references a tree location which is not * a node (eg the root). If bit 1 is set, the rest of the bits are a negative * errno. Bit 2 (the 'unallocated slots' bit) is clear. Bits 3-6 indicate the * node type. * * state->alloc either has a request number of nodes or an allocated node. If * stat->alloc has a requested number of nodes, the first bit will be set (0x1) * and the remaining bits are the value. If state->alloc is a node, then the * node will be of type maple_alloc. maple_alloc has MAPLE_NODE_SLOTS - 1 for * storing more allocated nodes, a total number of nodes allocated, and the * node_count in this node. node_count is the number of allocated nodes in this * node. The scaling beyond MAPLE_NODE_SLOTS - 1 is handled by storing further * nodes into state->alloc->slot[0]'s node. Nodes are taken from state->alloc * by removing a node from the state->alloc node until state->alloc->node_count * is 1, when state->alloc is returned and the state->alloc->slot[0] is promoted * to state->alloc. Nodes are pushed onto state->alloc by putting the current * state->alloc into the pushed node's slot[0]. * * The state also contains the implied min/max of the state->node, the depth of * this search, and the offset. The implied min/max are either from the parent * node or are 0-oo for the root node. The depth is incremented or decremented * every time a node is walked down or up. The offset is the slot/pivot of * interest in the node - either for reading or writing. * * When returning a value the maple state index and last respectively contain * the start and end of the range for the entry. Ranges are inclusive in the * Maple Tree. * * The status of the state is used to determine how the next action should treat * the state. For instance, if the status is ma_start then the next action * should start at the root of the tree and walk down. If the status is * ma_pause then the node may be stale data and should be discarded. If the * status is ma_overflow, then the last action hit the upper limit. * */ struct ma_state { struct maple_tree *tree; /* The tree we're operating in */ unsigned long index; /* The index we're operating on - range start */ unsigned long last; /* The last index we're operating on - range end */ struct maple_enode *node; /* The node containing this entry */ unsigned long min; /* The minimum index of this node - implied pivot min */ unsigned long max; /* The maximum index of this node - implied pivot max */ struct slab_sheaf *sheaf; /* Allocated nodes for this operation */ struct maple_node *alloc; /* A single allocated node for fast path writes */ unsigned long node_request; /* The number of nodes to allocate for this operation */ enum maple_status status; /* The status of the state (active, start, none, etc) */ unsigned char depth; /* depth of tree descent during write */ unsigned char offset; unsigned char mas_flags; unsigned char end; /* The end of the node */ enum store_type store_type; /* The type of store needed for this operation */ }; struct ma_wr_state { struct ma_state *mas; struct maple_node *node; /* Decoded mas->node */ unsigned long r_min; /* range min */ unsigned long r_max; /* range max */ enum maple_type type; /* mas->node type */ unsigned char offset_end; /* The offset where the write ends */ unsigned long *pivots; /* mas->node->pivots pointer */ unsigned long end_piv; /* The pivot at the offset end */ void __rcu **slots; /* mas->node->slots pointer */ void *entry; /* The entry to write */ void *content; /* The existing entry that is being overwritten */ unsigned char vacant_height; /* Height of lowest node with free space */ unsigned char sufficient_height;/* Height of lowest node with min sufficiency + 1 nodes */ }; #define mas_lock(mas) spin_lock(&((mas)->tree->ma_lock)) #define mas_lock_nested(mas, subclass) \ spin_lock_nested(&((mas)->tree->ma_lock), subclass) #define mas_unlock(mas) spin_unlock(&((mas)->tree->ma_lock)) /* * Special values for ma_state.node. * MA_ERROR represents an errno. After dropping the lock and attempting * to resolve the error, the walk would have to be restarted from the * top of the tree as the tree may have been modified. */ #define MA_ERROR(err) \ ((struct maple_enode *)(((unsigned long)err << 2) | 2UL)) /* * When changing MA_STATE, remember to also change rust/kernel/maple_tree.rs */ #define MA_STATE(name, mt, first, end) \ struct ma_state name = { \ .tree = mt, \ .index = first, \ .last = end, \ .node = NULL, \ .status = ma_start, \ .min = 0, \ .max = ULONG_MAX, \ .sheaf = NULL, \ .alloc = NULL, \ .node_request = 0, \ .mas_flags = 0, \ .store_type = wr_invalid, \ } #define MA_WR_STATE(name, ma_state, wr_entry) \ struct ma_wr_state name = { \ .mas = ma_state, \ .content = NULL, \ .entry = wr_entry, \ .vacant_height = 0, \ .sufficient_height = 0 \ } #define MA_TOPIARY(name, tree) \ struct ma_topiary name = { \ .head = NULL, \ .tail = NULL, \ .mtree = tree, \ } void *mas_walk(struct ma_state *mas); void *mas_store(struct ma_state *mas, void *entry); void *mas_erase(struct ma_state *mas); int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp); void mas_store_prealloc(struct ma_state *mas, void *entry); void *mas_find(struct ma_state *mas, unsigned long max); void *mas_find_range(struct ma_state *mas, unsigned long max); void *mas_find_rev(struct ma_state *mas, unsigned long min); void *mas_find_range_rev(struct ma_state *mas, unsigned long max); int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp); int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp, void *entry, unsigned long range_lo, unsigned long range_hi, unsigned long *next, gfp_t gfp); bool mas_nomem(struct ma_state *mas, gfp_t gfp); void mas_pause(struct ma_state *mas); void maple_tree_init(void); void mas_destroy(struct ma_state *mas); int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries); void *mas_prev(struct ma_state *mas, unsigned long min); void *mas_prev_range(struct ma_state *mas, unsigned long max); void *mas_next(struct ma_state *mas, unsigned long max); void *mas_next_range(struct ma_state *mas, unsigned long max); int mas_empty_area(struct ma_state *mas, unsigned long min, unsigned long max, unsigned long size); /* * This finds an empty area from the highest address to the lowest. * AKA "Topdown" version, */ int mas_empty_area_rev(struct ma_state *mas, unsigned long min, unsigned long max, unsigned long size); static inline void mas_init(struct ma_state *mas, struct maple_tree *tree, unsigned long addr) { memset(mas, 0, sizeof(struct ma_state)); mas->tree = tree; mas->index = mas->last = addr; mas->max = ULONG_MAX; mas->status = ma_start; mas->node = NULL; } static inline bool mas_is_active(struct ma_state *mas) { return mas->status == ma_active; } static inline bool mas_is_err(struct ma_state *mas) { return mas->status == ma_error; } /** * mas_reset() - Reset a Maple Tree operation state. * @mas: Maple Tree operation state. * * Resets the error or walk state of the @mas so future walks of the * array will start from the root. Use this if you have dropped the * lock and want to reuse the ma_state. * * Context: Any context. */ static __always_inline void mas_reset(struct ma_state *mas) { mas->status = ma_start; mas->node = NULL; } /** * mas_for_each() - Iterate over a range of the maple tree. * @__mas: Maple Tree operation state (maple_state) * @__entry: Entry retrieved from the tree * @__max: maximum index to retrieve from the tree * * When returned, mas->index and mas->last will hold the entire range for the * entry. * * Note: may return the zero entry. */ #define mas_for_each(__mas, __entry, __max) \ while (((__entry) = mas_find((__mas), (__max))) != NULL) /** * mas_for_each_rev() - Iterate over a range of the maple tree in reverse order. * @__mas: Maple Tree operation state (maple_state) * @__entry: Entry retrieved from the tree * @__min: minimum index to retrieve from the tree * * When returned, mas->index and mas->last will hold the entire range for the * entry. * * Note: may return the zero entry. */ #define mas_for_each_rev(__mas, __entry, __min) \ while (((__entry) = mas_find_rev((__mas), (__min))) != NULL) #ifdef CONFIG_DEBUG_MAPLE_TREE enum mt_dump_format { mt_dump_dec, mt_dump_hex, }; extern atomic_t maple_tree_tests_run; extern atomic_t maple_tree_tests_passed; void mt_dump(const struct maple_tree *mt, enum mt_dump_format format); void mas_dump(const struct ma_state *mas); void mas_wr_dump(const struct ma_wr_state *wr_mas); void mt_validate(struct maple_tree *mt); void mt_cache_shrink(void); #define MT_BUG_ON(__tree, __x) do { \ atomic_inc(&maple_tree_tests_run); \ if (__x) { \ pr_info("BUG at %s:%d (%u)\n", \ __func__, __LINE__, __x); \ mt_dump(__tree, mt_dump_hex); \ pr_info("Pass: %u Run:%u\n", \ atomic_read(&maple_tree_tests_passed), \ atomic_read(&maple_tree_tests_run)); \ dump_stack(); \ } else { \ atomic_inc(&maple_tree_tests_passed); \ } \ } while (0) #define MAS_BUG_ON(__mas, __x) do { \ atomic_inc(&maple_tree_tests_run); \ if (__x) { \ pr_info("BUG at %s:%d (%u)\n", \ __func__, __LINE__, __x); \ mas_dump(__mas); \ mt_dump((__mas)->tree, mt_dump_hex); \ pr_info("Pass: %u Run:%u\n", \ atomic_read(&maple_tree_tests_passed), \ atomic_read(&maple_tree_tests_run)); \ dump_stack(); \ } else { \ atomic_inc(&maple_tree_tests_passed); \ } \ } while (0) #define MAS_WR_BUG_ON(__wrmas, __x) do { \ atomic_inc(&maple_tree_tests_run); \ if (__x) { \ pr_info("BUG at %s:%d (%u)\n", \ __func__, __LINE__, __x); \ mas_wr_dump(__wrmas); \ mas_dump((__wrmas)->mas); \ mt_dump((__wrmas)->mas->tree, mt_dump_hex); \ pr_info("Pass: %u Run:%u\n", \ atomic_read(&maple_tree_tests_passed), \ atomic_read(&maple_tree_tests_run)); \ dump_stack(); \ } else { \ atomic_inc(&maple_tree_tests_passed); \ } \ } while (0) #define MT_WARN_ON(__tree, __x) ({ \ int ret = !!(__x); \ atomic_inc(&maple_tree_tests_run); \ if (ret) { \ pr_info("WARN at %s:%d (%u)\n", \ __func__, __LINE__, __x); \ mt_dump(__tree, mt_dump_hex); \ pr_info("Pass: %u Run:%u\n", \ atomic_read(&maple_tree_tests_passed), \ atomic_read(&maple_tree_tests_run)); \ dump_stack(); \ } else { \ atomic_inc(&maple_tree_tests_passed); \ } \ unlikely(ret); \ }) #define MAS_WARN_ON(__mas, __x) ({ \ int ret = !!(__x); \ atomic_inc(&maple_tree_tests_run); \ if (ret) { \ pr_info("WARN at %s:%d (%u)\n", \ __func__, __LINE__, __x); \ mas_dump(__mas); \ mt_dump((__mas)->tree, mt_dump_hex); \ pr_info("Pass: %u Run:%u\n", \ atomic_read(&maple_tree_tests_passed), \ atomic_read(&maple_tree_tests_run)); \ dump_stack(); \ } else { \ atomic_inc(&maple_tree_tests_passed); \ } \ unlikely(ret); \ }) #define MAS_WR_WARN_ON(__wrmas, __x) ({ \ int ret = !!(__x); \ atomic_inc(&maple_tree_tests_run); \ if (ret) { \ pr_info("WARN at %s:%d (%u)\n", \ __func__, __LINE__, __x); \ mas_wr_dump(__wrmas); \ mas_dump((__wrmas)->mas); \ mt_dump((__wrmas)->mas->tree, mt_dump_hex); \ pr_info("Pass: %u Run:%u\n", \ atomic_read(&maple_tree_tests_passed), \ atomic_read(&maple_tree_tests_run)); \ dump_stack(); \ } else { \ atomic_inc(&maple_tree_tests_passed); \ } \ unlikely(ret); \ }) #else #define MT_BUG_ON(__tree, __x) BUG_ON(__x) #define MAS_BUG_ON(__mas, __x) BUG_ON(__x) #define MAS_WR_BUG_ON(__mas, __x) BUG_ON(__x) #define MT_WARN_ON(__tree, __x) WARN_ON(__x) #define MAS_WARN_ON(__mas, __x) WARN_ON(__x) #define MAS_WR_WARN_ON(__mas, __x) WARN_ON(__x) #endif /* CONFIG_DEBUG_MAPLE_TREE */ /** * __mas_set_range() - Set up Maple Tree operation state to a sub-range of the * current location. * @mas: Maple Tree operation state. * @start: New start of range in the Maple Tree. * @last: New end of range in the Maple Tree. * * set the internal maple state values to a sub-range. * Please use mas_set_range() if you do not know where you are in the tree. */ static inline void __mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last) { /* Ensure the range starts within the current slot */ MAS_WARN_ON(mas, mas_is_active(mas) && (mas->index > start || mas->last < start)); mas->index = start; mas->last = last; } /** * mas_set_range() - Set up Maple Tree operation state for a different index. * @mas: Maple Tree operation state. * @start: New start of range in the Maple Tree. * @last: New end of range in the Maple Tree. * * Move the operation state to refer to a different range. This will * have the effect of starting a walk from the top; see mas_next() * to move to an adjacent index. */ static inline void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last) { mas_reset(mas); __mas_set_range(mas, start, last); } /** * mas_set() - Set up Maple Tree operation state for a different index. * @mas: Maple Tree operation state. * @index: New index into the Maple Tree. * * Move the operation state to refer to a different index. This will * have the effect of starting a walk from the top; see mas_next() * to move to an adjacent index. */ static inline void mas_set(struct ma_state *mas, unsigned long index) { mas_set_range(mas, index, index); } static inline bool mt_external_lock(const struct maple_tree *mt) { return (mt->ma_flags & MT_FLAGS_LOCK_MASK) == MT_FLAGS_LOCK_EXTERN; } /** * mt_init_flags() - Initialise an empty maple tree with flags. * @mt: Maple Tree * @flags: maple tree flags. * * If you need to initialise a Maple Tree with special flags (eg, an * allocation tree), use this function. * * Context: Any context. */ static inline void mt_init_flags(struct maple_tree *mt, unsigned int flags) { mt->ma_flags = flags; if (!mt_external_lock(mt)) spin_lock_init(&mt->ma_lock); rcu_assign_pointer(mt->ma_root, NULL); } /** * mt_init() - Initialise an empty maple tree. * @mt: Maple Tree * * An empty Maple Tree. * * Context: Any context. */ static inline void mt_init(struct maple_tree *mt) { mt_init_flags(mt, 0); } static inline bool mt_in_rcu(struct maple_tree *mt) { #ifdef CONFIG_MAPLE_RCU_DISABLED return false; #endif return mt->ma_flags & MT_FLAGS_USE_RCU; } /** * mt_clear_in_rcu() - Switch the tree to non-RCU mode. * @mt: The Maple Tree */ static inline void mt_clear_in_rcu(struct maple_tree *mt) { if (!mt_in_rcu(mt)) return; if (mt_external_lock(mt)) { WARN_ON(!mt_lock_is_held(mt)); mt->ma_flags &= ~MT_FLAGS_USE_RCU; } else { mtree_lock(mt); mt->ma_flags &= ~MT_FLAGS_USE_RCU; mtree_unlock(mt); } } /** * mt_set_in_rcu() - Switch the tree to RCU safe mode. * @mt: The Maple Tree */ static inline void mt_set_in_rcu(struct maple_tree *mt) { if (mt_in_rcu(mt)) return; if (mt_external_lock(mt)) { WARN_ON(!mt_lock_is_held(mt)); mt->ma_flags |= MT_FLAGS_USE_RCU; } else { mtree_lock(mt); mt->ma_flags |= MT_FLAGS_USE_RCU; mtree_unlock(mt); } } static inline unsigned int mt_height(const struct maple_tree *mt) { return (mt->ma_flags & MT_FLAGS_HEIGHT_MASK) >> MT_FLAGS_HEIGHT_OFFSET; } void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max); void *mt_find_after(struct maple_tree *mt, unsigned long *index, unsigned long max); void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min); void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max); /** * mt_for_each - Iterate over each entry starting at index until max. * @__tree: The Maple Tree * @__entry: The current entry * @__index: The index to start the search from. Subsequently used as iterator. * @__max: The maximum limit for @index * * This iterator skips all entries, which resolve to a NULL pointer, * e.g. entries which has been reserved with XA_ZERO_ENTRY. */ #define mt_for_each(__tree, __entry, __index, __max) \ for (__entry = mt_find(__tree, &(__index), __max); \ __entry; __entry = mt_find_after(__tree, &(__index), __max)) #endif /*_LINUX_MAPLE_TREE_H */
35 1 33 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2011 Florian Westphal <fw@strlen.de> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/route.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> #include <linux/netfilter/xt_rpfilter.h> #include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); MODULE_DESCRIPTION("Xtables: IPv6 reverse path filter match"); static bool rpfilter_addr_unicast(const struct in6_addr *addr) { int addr_type = ipv6_addr_type(addr); return addr_type & IPV6_ADDR_UNICAST; } static bool rpfilter_addr_linklocal(const struct in6_addr *addr) { int addr_type = ipv6_addr_type(addr); return addr_type & IPV6_ADDR_LINKLOCAL; } static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, const struct net_device *dev, u8 flags) { struct rt6_info *rt; struct ipv6hdr *iph = ipv6_hdr(skb); bool ret = false; struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev), .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, .flowi6_proto = iph->nexthdr, .flowi6_uid = sock_net_uid(net, NULL), .daddr = iph->saddr, }; int lookup_flags; if (rpfilter_addr_unicast(&iph->daddr)) { memcpy(&fl6.saddr, &iph->daddr, sizeof(struct in6_addr)); lookup_flags = RT6_LOOKUP_F_HAS_SADDR; } else { lookup_flags = 0; } fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; if (rpfilter_addr_linklocal(&iph->saddr)) { lookup_flags |= RT6_LOOKUP_F_IFACE; fl6.flowi6_oif = dev->ifindex; } else if ((flags & XT_RPFILTER_LOOSE) == 0) fl6.flowi6_oif = dev->ifindex; rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags); if (rt->dst.error) goto out; if (rt->rt6i_flags & (RTF_REJECT|RTF_ANYCAST)) goto out; if (rt->rt6i_flags & RTF_LOCAL) { ret = flags & XT_RPFILTER_ACCEPT_LOCAL; goto out; } if (rt->rt6i_idev->dev == dev || l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == dev->ifindex || (flags & XT_RPFILTER_LOOSE)) ret = true; out: ip6_rt_put(rt); return ret; } static bool rpfilter_is_loopback(const struct sk_buff *skb, const struct net_device *in) { return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK; } static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rpfilter_info *info = par->matchinfo; int saddrtype; struct ipv6hdr *iph; bool invert = info->flags & XT_RPFILTER_INVERT; if (rpfilter_is_loopback(skb, xt_in(par))) return true ^ invert; iph = ipv6_hdr(skb); saddrtype = ipv6_addr_type(&iph->saddr); if (unlikely(saddrtype == IPV6_ADDR_ANY)) return true ^ invert; /* not routable: forward path will drop it */ return rpfilter_lookup_reverse6(xt_net(par), skb, xt_in(par), info->flags) ^ invert; } static int rpfilter_check(const struct xt_mtchk_param *par) { const struct xt_rpfilter_info *info = par->matchinfo; unsigned int options = ~XT_RPFILTER_OPTION_MASK; if (info->flags & options) { pr_info_ratelimited("unknown options\n"); return -EINVAL; } if (strcmp(par->table, "mangle") != 0 && strcmp(par->table, "raw") != 0) { pr_info_ratelimited("only valid in \'raw\' or \'mangle\' table, not \'%s\'\n", par->table); return -EINVAL; } return 0; } static struct xt_match rpfilter_mt_reg __read_mostly = { .name = "rpfilter", .family = NFPROTO_IPV6, .checkentry = rpfilter_check, .match = rpfilter_mt, .matchsize = sizeof(struct xt_rpfilter_info), .hooks = (1 << NF_INET_PRE_ROUTING), .me = THIS_MODULE }; static int __init rpfilter_mt_init(void) { return xt_register_match(&rpfilter_mt_reg); } static void __exit rpfilter_mt_exit(void) { xt_unregister_match(&rpfilter_mt_reg); } module_init(rpfilter_mt_init); module_exit(rpfilter_mt_exit);
30 34 30 80 80 45 42 18 4 29 32 18 10 7 2 2 64 64 4 20 73 50 26 1 32 18 32 18 37 77 75 34 34 34 45 31 45 29 45 28 1 61 17 13 25 62 61 62 61 29 29 62 60 18 7 32 32 62 32 62 61 62 31 1 14 16 21 46 16 34 32 34 5 1 4 5 1 43 1 1 41 32 12 3 1 1 1 2 4 30 4 29 28 9 28 30 26 3 2 23 20 2 2 3 2 30 29 30 30 29 1 27 29 28 29 28 28 1 28 26 26 28 27 27 27 27 30 30 32 3 30 28 2 33 30 4 1 1 36 1 32 3 34 3 33 27 1 2 1 1 1 1 2 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 // SPDX-License-Identifier: GPL-2.0-only /* * Kernel-based Virtual Machine driver for Linux * cpuid support routines * * derived from arch/x86/kvm/x86.c * * Copyright 2011 Red Hat, Inc. and/or its affiliates. * Copyright IBM Corporation, 2008 */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include "linux/lockdep.h" #include <linux/export.h> #include <linux/vmalloc.h> #include <linux/uaccess.h> #include <linux/sched/stat.h> #include <asm/processor.h> #include <asm/user.h> #include <asm/fpu/xstate.h> #include <asm/sgx.h> #include <asm/cpuid/api.h> #include "cpuid.h" #include "lapic.h" #include "mmu.h" #include "trace.h" #include "pmu.h" #include "xen.h" /* * Unlike "struct cpuinfo_x86.x86_capability", kvm_cpu_caps doesn't need to be * aligned to sizeof(unsigned long) because it's not accessed via bitops. */ u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly; EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_cpu_caps); struct cpuid_xstate_sizes { u32 eax; u32 ebx; u32 ecx; }; static struct cpuid_xstate_sizes xstate_sizes[XFEATURE_MAX] __ro_after_init; void __init kvm_init_xstate_sizes(void) { u32 ign; int i; for (i = XFEATURE_YMM; i < ARRAY_SIZE(xstate_sizes); i++) { struct cpuid_xstate_sizes *xs = &xstate_sizes[i]; cpuid_count(0xD, i, &xs->eax, &xs->ebx, &xs->ecx, &ign); } } u32 xstate_required_size(u64 xstate_bv, bool compacted) { u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; int i; xstate_bv &= XFEATURE_MASK_EXTEND; for (i = XFEATURE_YMM; i < ARRAY_SIZE(xstate_sizes) && xstate_bv; i++) { struct cpuid_xstate_sizes *xs = &xstate_sizes[i]; u32 offset; if (!(xstate_bv & BIT_ULL(i))) continue; /* ECX[1]: 64B alignment in compacted form */ if (compacted) offset = (xs->ecx & 0x2) ? ALIGN(ret, 64) : ret; else offset = xs->ebx; ret = max(ret, offset + xs->eax); xstate_bv &= ~BIT_ULL(i); } return ret; } struct kvm_cpuid_entry2 *kvm_find_cpuid_entry2( struct kvm_cpuid_entry2 *entries, int nent, u32 function, u64 index) { struct kvm_cpuid_entry2 *e; int i; /* * KVM has a semi-arbitrary rule that querying the guest's CPUID model * with IRQs disabled is disallowed. The CPUID model can legitimately * have over one hundred entries, i.e. the lookup is slow, and IRQs are * typically disabled in KVM only when KVM is in a performance critical * path, e.g. the core VM-Enter/VM-Exit run loop. Nothing will break * if this rule is violated, this assertion is purely to flag potential * performance issues. If this fires, consider moving the lookup out * of the hotpath, e.g. by caching information during CPUID updates. */ lockdep_assert_irqs_enabled(); for (i = 0; i < nent; i++) { e = &entries[i]; if (e->function != function) continue; /* * If the index isn't significant, use the first entry with a * matching function. It's userspace's responsibility to not * provide "duplicate" entries in all cases. */ if (!(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) || e->index == index) return e; /* * Similarly, use the first matching entry if KVM is doing a * lookup (as opposed to emulating CPUID) for a function that's * architecturally defined as not having a significant index. */ if (index == KVM_CPUID_INDEX_NOT_SIGNIFICANT) { /* * Direct lookups from KVM should not diverge from what * KVM defines internally (the architectural behavior). */ WARN_ON_ONCE(cpuid_function_is_indexed(function)); return e; } } return NULL; } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_find_cpuid_entry2); static int kvm_check_cpuid(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; u64 xfeatures; /* * The existing code assumes virtual address is 48-bit or 57-bit in the * canonical address checks; exit if it is ever changed. */ best = kvm_find_cpuid_entry(vcpu, 0x80000008); if (best) { int vaddr_bits = (best->eax & 0xff00) >> 8; if (vaddr_bits != 48 && vaddr_bits != 57 && vaddr_bits != 0) return -EINVAL; } /* * Exposing dynamic xfeatures to the guest requires additional * enabling in the FPU, e.g. to expand the guest XSAVE state size. */ best = kvm_find_cpuid_entry_index(vcpu, 0xd, 0); if (!best) return 0; xfeatures = best->eax | ((u64)best->edx << 32); xfeatures &= XFEATURE_MASK_USER_DYNAMIC; if (!xfeatures) return 0; return fpu_enable_guest_xfd_features(&vcpu->arch.guest_fpu, xfeatures); } static u32 kvm_apply_cpuid_pv_features_quirk(struct kvm_vcpu *vcpu); static void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu); /* Check whether the supplied CPUID data is equal to what is already set for the vCPU. */ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, int nent) { struct kvm_cpuid_entry2 *orig; int i; /* * Apply runtime CPUID updates to the incoming CPUID entries to avoid * false positives due mismatches on KVM-owned feature flags. * * Note! @e2 and @nent track the _old_ CPUID entries! */ kvm_update_cpuid_runtime(vcpu); kvm_apply_cpuid_pv_features_quirk(vcpu); if (nent != vcpu->arch.cpuid_nent) return -EINVAL; for (i = 0; i < nent; i++) { orig = &vcpu->arch.cpuid_entries[i]; if (e2[i].function != orig->function || e2[i].index != orig->index || e2[i].flags != orig->flags || e2[i].eax != orig->eax || e2[i].ebx != orig->ebx || e2[i].ecx != orig->ecx || e2[i].edx != orig->edx) return -EINVAL; } return 0; } static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu, const char *sig) { struct kvm_hypervisor_cpuid cpuid = {}; struct kvm_cpuid_entry2 *entry; u32 base; for_each_possible_cpuid_base_hypervisor(base) { entry = kvm_find_cpuid_entry(vcpu, base); if (entry) { u32 signature[3]; signature[0] = entry->ebx; signature[1] = entry->ecx; signature[2] = entry->edx; if (!memcmp(signature, sig, sizeof(signature))) { cpuid.base = base; cpuid.limit = entry->eax; break; } } } return cpuid; } static u32 kvm_apply_cpuid_pv_features_quirk(struct kvm_vcpu *vcpu) { struct kvm_hypervisor_cpuid kvm_cpuid; struct kvm_cpuid_entry2 *best; kvm_cpuid = kvm_get_hypervisor_cpuid(vcpu, KVM_SIGNATURE); if (!kvm_cpuid.base) return 0; best = kvm_find_cpuid_entry(vcpu, kvm_cpuid.base | KVM_CPUID_FEATURES); if (!best) return 0; if (kvm_hlt_in_guest(vcpu->kvm)) best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); return best->eax; } /* * Calculate guest's supported XCR0 taking into account guest CPUID data and * KVM's supported XCR0 (comprised of host's XCR0 and KVM_SUPPORTED_XCR0). */ static u64 cpuid_get_supported_xcr0(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; best = kvm_find_cpuid_entry_index(vcpu, 0xd, 0); if (!best) return 0; return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0; } static u64 cpuid_get_supported_xss(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; best = kvm_find_cpuid_entry_index(vcpu, 0xd, 1); if (!best) return 0; return (best->ecx | ((u64)best->edx << 32)) & kvm_caps.supported_xss; } static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entry, unsigned int x86_feature, bool has_feature) { cpuid_entry_change(entry, x86_feature, has_feature); guest_cpu_cap_change(vcpu, x86_feature, has_feature); } static void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; vcpu->arch.cpuid_dynamic_bits_dirty = false; best = kvm_find_cpuid_entry(vcpu, 1); if (best) { kvm_update_feature_runtime(vcpu, best, X86_FEATURE_OSXSAVE, kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)); kvm_update_feature_runtime(vcpu, best, X86_FEATURE_APIC, vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE); if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) kvm_update_feature_runtime(vcpu, best, X86_FEATURE_MWAIT, vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT); } best = kvm_find_cpuid_entry_index(vcpu, 7, 0); if (best) kvm_update_feature_runtime(vcpu, best, X86_FEATURE_OSPKE, kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE)); best = kvm_find_cpuid_entry_index(vcpu, 0xD, 0); if (best) best->ebx = xstate_required_size(vcpu->arch.xcr0, false); best = kvm_find_cpuid_entry_index(vcpu, 0xD, 1); if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) || cpuid_entry_has(best, X86_FEATURE_XSAVEC))) best->ebx = xstate_required_size(vcpu->arch.xcr0 | vcpu->arch.ia32_xss, true); } static bool kvm_cpuid_has_hyperv(struct kvm_vcpu *vcpu) { #ifdef CONFIG_KVM_HYPERV struct kvm_cpuid_entry2 *entry; entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE); return entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX; #else return false; #endif } static bool guest_cpuid_is_amd_or_hygon(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *entry; entry = kvm_find_cpuid_entry(vcpu, 0); if (!entry) return false; return is_guest_vendor_amd(entry->ebx, entry->ecx, entry->edx) || is_guest_vendor_hygon(entry->ebx, entry->ecx, entry->edx); } /* * This isn't truly "unsafe", but except for the cpu_caps initialization code, * all register lookups should use __cpuid_entry_get_reg(), which provides * compile-time validation of the input. */ static u32 cpuid_get_reg_unsafe(struct kvm_cpuid_entry2 *entry, u32 reg) { switch (reg) { case CPUID_EAX: return entry->eax; case CPUID_EBX: return entry->ebx; case CPUID_ECX: return entry->ecx; case CPUID_EDX: return entry->edx; default: WARN_ON_ONCE(1); return 0; } } static int cpuid_func_emulated(struct kvm_cpuid_entry2 *entry, u32 func, bool include_partially_emulated); void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; struct kvm_cpuid_entry2 *best; struct kvm_cpuid_entry2 *entry; bool allow_gbpages; int i; memset(vcpu->arch.cpu_caps, 0, sizeof(vcpu->arch.cpu_caps)); BUILD_BUG_ON(ARRAY_SIZE(reverse_cpuid) != NR_KVM_CPU_CAPS); /* * Reset guest capabilities to userspace's guest CPUID definition, i.e. * honor userspace's definition for features that don't require KVM or * hardware management/support (or that KVM simply doesn't care about). */ for (i = 0; i < NR_KVM_CPU_CAPS; i++) { const struct cpuid_reg cpuid = reverse_cpuid[i]; struct kvm_cpuid_entry2 emulated; if (!cpuid.function) continue; entry = kvm_find_cpuid_entry_index(vcpu, cpuid.function, cpuid.index); if (!entry) continue; cpuid_func_emulated(&emulated, cpuid.function, true); /* * A vCPU has a feature if it's supported by KVM and is enabled * in guest CPUID. Note, this includes features that are * supported by KVM but aren't advertised to userspace! */ vcpu->arch.cpu_caps[i] = kvm_cpu_caps[i] | cpuid_get_reg_unsafe(&emulated, cpuid.reg); vcpu->arch.cpu_caps[i] &= cpuid_get_reg_unsafe(entry, cpuid.reg); } kvm_update_cpuid_runtime(vcpu); /* * If TDP is enabled, let the guest use GBPAGES if they're supported in * hardware. The hardware page walker doesn't let KVM disable GBPAGES, * i.e. won't treat them as reserved, and KVM doesn't redo the GVA->GPA * walk for performance and complexity reasons. Not to mention KVM * _can't_ solve the problem because GVA->GPA walks aren't visible to * KVM once a TDP translation is installed. Mimic hardware behavior so * that KVM's is at least consistent, i.e. doesn't randomly inject #PF. * If TDP is disabled, honor *only* guest CPUID as KVM has full control * and can install smaller shadow pages if the host lacks 1GiB support. */ allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) : guest_cpu_cap_has(vcpu, X86_FEATURE_GBPAGES); guest_cpu_cap_change(vcpu, X86_FEATURE_GBPAGES, allow_gbpages); best = kvm_find_cpuid_entry(vcpu, 1); if (best && apic) { if (cpuid_entry_has(best, X86_FEATURE_TSC_DEADLINE_TIMER)) apic->lapic_timer.timer_mode_mask = 3 << 17; else apic->lapic_timer.timer_mode_mask = 1 << 17; kvm_apic_set_version(vcpu); } vcpu->arch.guest_supported_xcr0 = cpuid_get_supported_xcr0(vcpu); vcpu->arch.guest_supported_xss = cpuid_get_supported_xss(vcpu); vcpu->arch.pv_cpuid.features = kvm_apply_cpuid_pv_features_quirk(vcpu); vcpu->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu); kvm_pmu_refresh(vcpu); #define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f) vcpu->arch.cr4_guest_rsvd_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_) | __cr4_reserved_bits(guest_cpu_cap_has, vcpu); #undef __kvm_cpu_cap_has kvm_hv_set_cpuid(vcpu, kvm_cpuid_has_hyperv(vcpu)); /* Invoke the vendor callback only after the above state is updated. */ kvm_x86_call(vcpu_after_set_cpuid)(vcpu); /* * Except for the MMU, which needs to do its thing any vendor specific * adjustments to the reserved GPA bits. */ kvm_mmu_after_set_cpuid(vcpu); kvm_make_request(KVM_REQ_RECALC_INTERCEPTS, vcpu); } int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; best = kvm_find_cpuid_entry(vcpu, 0x80000000); if (!best || best->eax < 0x80000008) goto not_found; best = kvm_find_cpuid_entry(vcpu, 0x80000008); if (best) return best->eax & 0xff; not_found: return 36; } int cpuid_query_maxguestphyaddr(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; best = kvm_find_cpuid_entry(vcpu, 0x80000000); if (!best || best->eax < 0x80000008) goto not_found; best = kvm_find_cpuid_entry(vcpu, 0x80000008); if (best) return (best->eax >> 16) & 0xff; not_found: return 0; } /* * This "raw" version returns the reserved GPA bits without any adjustments for * encryption technologies that usurp bits. The raw mask should be used if and * only if hardware does _not_ strip the usurped bits, e.g. in virtual MTRRs. */ u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu) { return rsvd_bits(cpuid_maxphyaddr(vcpu), 63); } static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, int nent) { u32 vcpu_caps[NR_KVM_CPU_CAPS]; int r; /* * Swap the existing (old) entries with the incoming (new) entries in * order to massage the new entries, e.g. to account for dynamic bits * that KVM controls, without clobbering the current guest CPUID, which * KVM needs to preserve in order to unwind on failure. * * Similarly, save the vCPU's current cpu_caps so that the capabilities * can be updated alongside the CPUID entries when performing runtime * updates. Full initialization is done if and only if the vCPU hasn't * run, i.e. only if userspace is potentially changing CPUID features. */ swap(vcpu->arch.cpuid_entries, e2); swap(vcpu->arch.cpuid_nent, nent); memcpy(vcpu_caps, vcpu->arch.cpu_caps, sizeof(vcpu_caps)); BUILD_BUG_ON(sizeof(vcpu_caps) != sizeof(vcpu->arch.cpu_caps)); /* * KVM does not correctly handle changing guest CPUID after KVM_RUN, as * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't * tracked in kvm_mmu_page_role. As a result, KVM may miss guest page * faults due to reusing SPs/SPTEs. In practice no sane VMM mucks with * the core vCPU model on the fly. It would've been better to forbid any * KVM_SET_CPUID{,2} calls after KVM_RUN altogether but unfortunately * some VMMs (e.g. QEMU) reuse vCPU fds for CPU hotplug/unplug and do * KVM_SET_CPUID{,2} again. To support this legacy behavior, check * whether the supplied CPUID data is equal to what's already set. */ if (kvm_vcpu_has_run(vcpu)) { r = kvm_cpuid_check_equal(vcpu, e2, nent); if (r) goto err; goto success; } #ifdef CONFIG_KVM_HYPERV if (kvm_cpuid_has_hyperv(vcpu)) { r = kvm_hv_vcpu_init(vcpu); if (r) goto err; } #endif r = kvm_check_cpuid(vcpu); if (r) goto err; #ifdef CONFIG_KVM_XEN vcpu->arch.xen.cpuid = kvm_get_hypervisor_cpuid(vcpu, XEN_SIGNATURE); #endif kvm_vcpu_after_set_cpuid(vcpu); success: kvfree(e2); return 0; err: memcpy(vcpu->arch.cpu_caps, vcpu_caps, sizeof(vcpu_caps)); swap(vcpu->arch.cpuid_entries, e2); swap(vcpu->arch.cpuid_nent, nent); return r; } /* when an old userspace process fills a new kernel module */ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries) { int r, i; struct kvm_cpuid_entry *e = NULL; struct kvm_cpuid_entry2 *e2 = NULL; if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) return -E2BIG; if (cpuid->nent) { e = vmemdup_array_user(entries, cpuid->nent, sizeof(*e)); if (IS_ERR(e)) return PTR_ERR(e); e2 = kvmalloc_array(cpuid->nent, sizeof(*e2), GFP_KERNEL_ACCOUNT); if (!e2) { r = -ENOMEM; goto out_free_cpuid; } } for (i = 0; i < cpuid->nent; i++) { e2[i].function = e[i].function; e2[i].eax = e[i].eax; e2[i].ebx = e[i].ebx; e2[i].ecx = e[i].ecx; e2[i].edx = e[i].edx; e2[i].index = 0; e2[i].flags = 0; e2[i].padding[0] = 0; e2[i].padding[1] = 0; e2[i].padding[2] = 0; } r = kvm_set_cpuid(vcpu, e2, cpuid->nent); if (r) kvfree(e2); out_free_cpuid: kvfree(e); return r; } int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries) { struct kvm_cpuid_entry2 *e2 = NULL; int r; if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) return -E2BIG; if (cpuid->nent) { e2 = vmemdup_array_user(entries, cpuid->nent, sizeof(*e2)); if (IS_ERR(e2)) return PTR_ERR(e2); } r = kvm_set_cpuid(vcpu, e2, cpuid->nent); if (r) kvfree(e2); return r; } int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries) { if (cpuid->nent < vcpu->arch.cpuid_nent) return -E2BIG; if (vcpu->arch.cpuid_dynamic_bits_dirty) kvm_update_cpuid_runtime(vcpu); if (copy_to_user(entries, vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) return -EFAULT; cpuid->nent = vcpu->arch.cpuid_nent; return 0; } static __always_inline u32 raw_cpuid_get(struct cpuid_reg cpuid) { struct kvm_cpuid_entry2 entry; u32 base; /* * KVM only supports features defined by Intel (0x0), AMD (0x80000000), * and Centaur (0xc0000000). WARN if a feature for new vendor base is * defined, as this and other code would need to be updated. */ base = cpuid.function & 0xffff0000; if (WARN_ON_ONCE(base && base != 0x80000000 && base != 0xc0000000)) return 0; if (cpuid_eax(base) < cpuid.function) return 0; cpuid_count(cpuid.function, cpuid.index, &entry.eax, &entry.ebx, &entry.ecx, &entry.edx); return *__cpuid_entry_get_reg(&entry, cpuid.reg); } /* * For kernel-defined leafs, mask KVM's supported feature set with the kernel's * capabilities as well as raw CPUID. For KVM-defined leafs, consult only raw * CPUID, as KVM is the one and only authority (in the kernel). */ #define kvm_cpu_cap_init(leaf, feature_initializers...) \ do { \ const struct cpuid_reg cpuid = x86_feature_cpuid(leaf * 32); \ const u32 __maybe_unused kvm_cpu_cap_init_in_progress = leaf; \ const u32 *kernel_cpu_caps = boot_cpu_data.x86_capability; \ u32 kvm_cpu_cap_passthrough = 0; \ u32 kvm_cpu_cap_synthesized = 0; \ u32 kvm_cpu_cap_emulated = 0; \ u32 kvm_cpu_cap_features = 0; \ \ feature_initializers \ \ kvm_cpu_caps[leaf] = kvm_cpu_cap_features; \ \ if (leaf < NCAPINTS) \ kvm_cpu_caps[leaf] &= kernel_cpu_caps[leaf]; \ \ kvm_cpu_caps[leaf] |= kvm_cpu_cap_passthrough; \ kvm_cpu_caps[leaf] &= (raw_cpuid_get(cpuid) | \ kvm_cpu_cap_synthesized); \ kvm_cpu_caps[leaf] |= kvm_cpu_cap_emulated; \ } while (0) /* * Assert that the feature bit being declared, e.g. via F(), is in the CPUID * word that's being initialized. Exempt 0x8000_0001.EDX usage of 0x1.EDX * features, as AMD duplicated many 0x1.EDX features into 0x8000_0001.EDX. */ #define KVM_VALIDATE_CPU_CAP_USAGE(name) \ do { \ u32 __leaf = __feature_leaf(X86_FEATURE_##name); \ \ BUILD_BUG_ON(__leaf != kvm_cpu_cap_init_in_progress); \ } while (0) #define F(name) \ ({ \ KVM_VALIDATE_CPU_CAP_USAGE(name); \ kvm_cpu_cap_features |= feature_bit(name); \ }) /* Scattered Flag - For features that are scattered by cpufeatures.h. */ #define SCATTERED_F(name) \ ({ \ BUILD_BUG_ON(X86_FEATURE_##name >= MAX_CPU_FEATURES); \ KVM_VALIDATE_CPU_CAP_USAGE(name); \ if (boot_cpu_has(X86_FEATURE_##name)) \ F(name); \ }) /* Features that KVM supports only on 64-bit kernels. */ #define X86_64_F(name) \ ({ \ KVM_VALIDATE_CPU_CAP_USAGE(name); \ if (IS_ENABLED(CONFIG_X86_64)) \ F(name); \ }) /* * Emulated Feature - For features that KVM emulates in software irrespective * of host CPU/kernel support. */ #define EMULATED_F(name) \ ({ \ kvm_cpu_cap_emulated |= feature_bit(name); \ F(name); \ }) /* * Synthesized Feature - For features that are synthesized into boot_cpu_data, * i.e. may not be present in the raw CPUID, but can still be advertised to * userspace. Primarily used for mitigation related feature flags. */ #define SYNTHESIZED_F(name) \ ({ \ kvm_cpu_cap_synthesized |= feature_bit(name); \ F(name); \ }) /* * Passthrough Feature - For features that KVM supports based purely on raw * hardware CPUID, i.e. that KVM virtualizes even if the host kernel doesn't * use the feature. Simply force set the feature in KVM's capabilities, raw * CPUID support will be factored in by kvm_cpu_cap_mask(). */ #define PASSTHROUGH_F(name) \ ({ \ kvm_cpu_cap_passthrough |= feature_bit(name); \ F(name); \ }) /* * Aliased Features - For features in 0x8000_0001.EDX that are duplicates of * identical 0x1.EDX features, and thus are aliased from 0x1 to 0x8000_0001. */ #define ALIASED_1_EDX_F(name) \ ({ \ BUILD_BUG_ON(__feature_leaf(X86_FEATURE_##name) != CPUID_1_EDX); \ BUILD_BUG_ON(kvm_cpu_cap_init_in_progress != CPUID_8000_0001_EDX); \ kvm_cpu_cap_features |= feature_bit(name); \ }) /* * Vendor Features - For features that KVM supports, but are added in later * because they require additional vendor enabling. */ #define VENDOR_F(name) \ ({ \ KVM_VALIDATE_CPU_CAP_USAGE(name); \ }) /* * Runtime Features - For features that KVM dynamically sets/clears at runtime, * e.g. when CR4 changes, but which are never advertised to userspace. */ #define RUNTIME_F(name) \ ({ \ KVM_VALIDATE_CPU_CAP_USAGE(name); \ }) /* * Undefine the MSR bit macro to avoid token concatenation issues when * processing X86_FEATURE_SPEC_CTRL_SSBD. */ #undef SPEC_CTRL_SSBD /* DS is defined by ptrace-abi.h on 32-bit builds. */ #undef DS void kvm_set_cpu_caps(void) { memset(kvm_cpu_caps, 0, sizeof(kvm_cpu_caps)); BUILD_BUG_ON(sizeof(kvm_cpu_caps) - (NKVMCAPINTS * sizeof(*kvm_cpu_caps)) > sizeof(boot_cpu_data.x86_capability)); kvm_cpu_cap_init(CPUID_1_ECX, F(XMM3), F(PCLMULQDQ), VENDOR_F(DTES64), /* * NOTE: MONITOR (and MWAIT) are emulated as NOP, but *not* * advertised to guests via CPUID! MWAIT is also technically a * runtime flag thanks to IA32_MISC_ENABLES; mark it as such so * that KVM is aware that it's a known, unadvertised flag. */ RUNTIME_F(MWAIT), /* DS-CPL */ VENDOR_F(VMX), /* SMX, EST */ /* TM2 */ F(SSSE3), /* CNXT-ID */ /* Reserved */ F(FMA), F(CX16), /* xTPR Update */ F(PDCM), F(PCID), /* Reserved, DCA */ F(XMM4_1), F(XMM4_2), EMULATED_F(X2APIC), F(MOVBE), F(POPCNT), EMULATED_F(TSC_DEADLINE_TIMER), F(AES), F(XSAVE), RUNTIME_F(OSXSAVE), F(AVX), F(F16C), F(RDRAND), EMULATED_F(HYPERVISOR), ); kvm_cpu_cap_init(CPUID_1_EDX, F(FPU), F(VME), F(DE), F(PSE), F(TSC), F(MSR), F(PAE), F(MCE), F(CX8), F(APIC), /* Reserved */ F(SEP), F(MTRR), F(PGE), F(MCA), F(CMOV), F(PAT), F(PSE36), /* PSN */ F(CLFLUSH), /* Reserved */ VENDOR_F(DS), /* ACPI */ F(MMX), F(FXSR), F(XMM), F(XMM2), F(SELFSNOOP), /* HTT, TM, Reserved, PBE */ ); kvm_cpu_cap_init(CPUID_7_0_EBX, F(FSGSBASE), EMULATED_F(TSC_ADJUST), F(SGX), F(BMI1), F(HLE), F(AVX2), F(FDP_EXCPTN_ONLY), F(SMEP), F(BMI2), F(ERMS), F(INVPCID), F(RTM), F(ZERO_FCS_FDS), VENDOR_F(MPX), F(AVX512F), F(AVX512DQ), F(RDSEED), F(ADX), F(SMAP), F(AVX512IFMA), F(CLFLUSHOPT), F(CLWB), VENDOR_F(INTEL_PT), F(AVX512PF), F(AVX512ER), F(AVX512CD), F(SHA_NI), F(AVX512BW), F(AVX512VL), ); kvm_cpu_cap_init(CPUID_7_ECX, F(AVX512VBMI), PASSTHROUGH_F(LA57), F(PKU), RUNTIME_F(OSPKE), F(RDPID), F(AVX512_VPOPCNTDQ), F(UMIP), F(AVX512_VBMI2), F(GFNI), F(VAES), F(VPCLMULQDQ), F(AVX512_VNNI), F(AVX512_BITALG), F(CLDEMOTE), F(MOVDIRI), F(MOVDIR64B), VENDOR_F(WAITPKG), F(SGX_LC), F(BUS_LOCK_DETECT), X86_64_F(SHSTK), ); /* * PKU not yet implemented for shadow paging and requires OSPKE * to be set on the host. Clear it if that is not the case */ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) kvm_cpu_cap_clear(X86_FEATURE_PKU); /* * Shadow Stacks aren't implemented in the Shadow MMU. Shadow Stack * accesses require "magic" Writable=0,Dirty=1 protection, which KVM * doesn't know how to emulate or map. */ if (!tdp_enabled) kvm_cpu_cap_clear(X86_FEATURE_SHSTK); kvm_cpu_cap_init(CPUID_7_EDX, F(AVX512_4VNNIW), F(AVX512_4FMAPS), F(SPEC_CTRL), F(SPEC_CTRL_SSBD), EMULATED_F(ARCH_CAPABILITIES), F(INTEL_STIBP), F(MD_CLEAR), F(AVX512_VP2INTERSECT), F(FSRM), F(SERIALIZE), F(TSXLDTRK), F(AVX512_FP16), F(AMX_TILE), F(AMX_INT8), F(AMX_BF16), F(FLUSH_L1D), F(IBT), ); /* * Disable support for IBT and SHSTK if KVM is configured to emulate * accesses to reserved GPAs, as KVM's emulator doesn't support IBT or * SHSTK, nor does KVM handle Shadow Stack #PFs (see above). */ if (allow_smaller_maxphyaddr) { kvm_cpu_cap_clear(X86_FEATURE_SHSTK); kvm_cpu_cap_clear(X86_FEATURE_IBT); } if (boot_cpu_has(X86_FEATURE_AMD_IBPB_RET) && boot_cpu_has(X86_FEATURE_AMD_IBPB) && boot_cpu_has(X86_FEATURE_AMD_IBRS)) kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL); if (boot_cpu_has(X86_FEATURE_STIBP)) kvm_cpu_cap_set(X86_FEATURE_INTEL_STIBP); if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD); kvm_cpu_cap_init(CPUID_7_1_EAX, F(SHA512), F(SM3), F(SM4), F(AVX_VNNI), F(AVX512_BF16), F(CMPCCXADD), F(FZRM), F(FSRS), F(FSRC), F(WRMSRNS), X86_64_F(LKGS), F(AMX_FP16), F(AVX_IFMA), F(LAM), ); kvm_cpu_cap_init(CPUID_7_1_ECX, SCATTERED_F(MSR_IMM), ); kvm_cpu_cap_init(CPUID_7_1_EDX, F(AVX_VNNI_INT8), F(AVX_NE_CONVERT), F(AMX_COMPLEX), F(AVX_VNNI_INT16), F(PREFETCHITI), F(AVX10), ); kvm_cpu_cap_init(CPUID_7_2_EDX, F(INTEL_PSFD), F(IPRED_CTRL), F(RRSBA_CTRL), F(DDPD_U), F(BHI_CTRL), F(MCDT_NO), ); kvm_cpu_cap_init(CPUID_D_1_EAX, F(XSAVEOPT), F(XSAVEC), F(XGETBV1), F(XSAVES), X86_64_F(XFD), ); kvm_cpu_cap_init(CPUID_12_EAX, SCATTERED_F(SGX1), SCATTERED_F(SGX2), SCATTERED_F(SGX_EDECCSSA), ); kvm_cpu_cap_init(CPUID_24_0_EBX, F(AVX10_128), F(AVX10_256), F(AVX10_512), ); kvm_cpu_cap_init(CPUID_8000_0001_ECX, F(LAHF_LM), F(CMP_LEGACY), VENDOR_F(SVM), /* ExtApicSpace */ F(CR8_LEGACY), F(ABM), F(SSE4A), F(MISALIGNSSE), F(3DNOWPREFETCH), F(OSVW), /* IBS */ F(XOP), /* SKINIT, WDT, LWP */ F(FMA4), F(TBM), F(TOPOEXT), VENDOR_F(PERFCTR_CORE), ); kvm_cpu_cap_init(CPUID_8000_0001_EDX, ALIASED_1_EDX_F(FPU), ALIASED_1_EDX_F(VME), ALIASED_1_EDX_F(DE), ALIASED_1_EDX_F(PSE), ALIASED_1_EDX_F(TSC), ALIASED_1_EDX_F(MSR), ALIASED_1_EDX_F(PAE), ALIASED_1_EDX_F(MCE), ALIASED_1_EDX_F(CX8), ALIASED_1_EDX_F(APIC), /* Reserved */ F(SYSCALL), ALIASED_1_EDX_F(MTRR), ALIASED_1_EDX_F(PGE), ALIASED_1_EDX_F(MCA), ALIASED_1_EDX_F(CMOV), ALIASED_1_EDX_F(PAT), ALIASED_1_EDX_F(PSE36), /* Reserved */ F(NX), /* Reserved */ F(MMXEXT), ALIASED_1_EDX_F(MMX), ALIASED_1_EDX_F(FXSR), F(FXSR_OPT), X86_64_F(GBPAGES), F(RDTSCP), /* Reserved */ X86_64_F(LM), F(3DNOWEXT), F(3DNOW), ); if (!tdp_enabled && IS_ENABLED(CONFIG_X86_64)) kvm_cpu_cap_set(X86_FEATURE_GBPAGES); kvm_cpu_cap_init(CPUID_8000_0007_EDX, SCATTERED_F(CONSTANT_TSC), ); kvm_cpu_cap_init(CPUID_8000_0008_EBX, F(CLZERO), F(XSAVEERPTR), F(WBNOINVD), F(AMD_IBPB), F(AMD_IBRS), F(AMD_SSBD), F(VIRT_SSBD), F(AMD_SSB_NO), F(AMD_STIBP), F(AMD_STIBP_ALWAYS_ON), F(AMD_IBRS_SAME_MODE), F(AMD_PSFD), F(AMD_IBPB_RET), ); /* * AMD has separate bits for each SPEC_CTRL bit. * arch/x86/kernel/cpu/bugs.c is kind enough to * record that in cpufeatures so use them. */ if (boot_cpu_has(X86_FEATURE_IBPB)) { kvm_cpu_cap_set(X86_FEATURE_AMD_IBPB); if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) && !boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) kvm_cpu_cap_set(X86_FEATURE_AMD_IBPB_RET); } if (boot_cpu_has(X86_FEATURE_IBRS)) kvm_cpu_cap_set(X86_FEATURE_AMD_IBRS); if (boot_cpu_has(X86_FEATURE_STIBP)) kvm_cpu_cap_set(X86_FEATURE_AMD_STIBP); if (boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) kvm_cpu_cap_set(X86_FEATURE_AMD_SSBD); if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) kvm_cpu_cap_set(X86_FEATURE_AMD_SSB_NO); /* * The preference is to use SPEC CTRL MSR instead of the * VIRT_SPEC MSR. */ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) && !boot_cpu_has(X86_FEATURE_AMD_SSBD)) kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD); /* All SVM features required additional vendor module enabling. */ kvm_cpu_cap_init(CPUID_8000_000A_EDX, VENDOR_F(NPT), VENDOR_F(VMCBCLEAN), VENDOR_F(FLUSHBYASID), VENDOR_F(NRIPS), VENDOR_F(TSCRATEMSR), VENDOR_F(V_VMSAVE_VMLOAD), VENDOR_F(LBRV), VENDOR_F(PAUSEFILTER), VENDOR_F(PFTHRESHOLD), VENDOR_F(VGIF), VENDOR_F(VNMI), VENDOR_F(SVME_ADDR_CHK), ); kvm_cpu_cap_init(CPUID_8000_001F_EAX, VENDOR_F(SME), VENDOR_F(SEV), /* VM_PAGE_FLUSH */ VENDOR_F(SEV_ES), F(SME_COHERENT), ); kvm_cpu_cap_init(CPUID_8000_0021_EAX, F(NO_NESTED_DATA_BP), F(WRMSR_XX_BASE_NS), /* * Synthesize "LFENCE is serializing" into the AMD-defined entry * in KVM's supported CPUID, i.e. if the feature is reported as * supported by the kernel. LFENCE_RDTSC was a Linux-defined * synthetic feature long before AMD joined the bandwagon, e.g. * LFENCE is serializing on most CPUs that support SSE2. On * CPUs that don't support AMD's leaf, ANDing with the raw host * CPUID will drop the flags, and reporting support in AMD's * leaf can make it easier for userspace to detect the feature. */ SYNTHESIZED_F(LFENCE_RDTSC), /* SmmPgCfgLock */ /* 4: Resv */ SYNTHESIZED_F(VERW_CLEAR), F(NULL_SEL_CLR_BASE), /* UpperAddressIgnore */ F(AUTOIBRS), F(PREFETCHI), EMULATED_F(NO_SMM_CTL_MSR), /* PrefetchCtlMsr */ /* GpOnUserCpuid */ /* EPSF */ SYNTHESIZED_F(SBPB), SYNTHESIZED_F(IBPB_BRTYPE), SYNTHESIZED_F(SRSO_NO), F(SRSO_USER_KERNEL_NO), ); kvm_cpu_cap_init(CPUID_8000_0021_ECX, SYNTHESIZED_F(TSA_SQ_NO), SYNTHESIZED_F(TSA_L1_NO), ); kvm_cpu_cap_init(CPUID_8000_0022_EAX, F(PERFMON_V2), ); if (!static_cpu_has_bug(X86_BUG_NULL_SEG)) kvm_cpu_cap_set(X86_FEATURE_NULL_SEL_CLR_BASE); kvm_cpu_cap_init(CPUID_C000_0001_EDX, F(XSTORE), F(XSTORE_EN), F(XCRYPT), F(XCRYPT_EN), F(ACE2), F(ACE2_EN), F(PHE), F(PHE_EN), F(PMM), F(PMM_EN), ); /* * Hide RDTSCP and RDPID if either feature is reported as supported but * probing MSR_TSC_AUX failed. This is purely a sanity check and * should never happen, but the guest will likely crash if RDTSCP or * RDPID is misreported, and KVM has botched MSR_TSC_AUX emulation in * the past. For example, the sanity check may fire if this instance of * KVM is running as L1 on top of an older, broken KVM. */ if (WARN_ON((kvm_cpu_cap_has(X86_FEATURE_RDTSCP) || kvm_cpu_cap_has(X86_FEATURE_RDPID)) && !kvm_is_supported_user_return_msr(MSR_TSC_AUX))) { kvm_cpu_cap_clear(X86_FEATURE_RDTSCP); kvm_cpu_cap_clear(X86_FEATURE_RDPID); } } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cpu_caps); #undef F #undef SCATTERED_F #undef X86_64_F #undef EMULATED_F #undef SYNTHESIZED_F #undef PASSTHROUGH_F #undef ALIASED_1_EDX_F #undef VENDOR_F #undef RUNTIME_F struct kvm_cpuid_array { struct kvm_cpuid_entry2 *entries; int maxnent; int nent; }; static struct kvm_cpuid_entry2 *get_next_cpuid(struct kvm_cpuid_array *array) { if (array->nent >= array->maxnent) return NULL; return &array->entries[array->nent++]; } static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array, u32 function, u32 index) { struct kvm_cpuid_entry2 *entry = get_next_cpuid(array); if (!entry) return NULL; memset(entry, 0, sizeof(*entry)); entry->function = function; entry->index = index; switch (function & 0xC0000000) { case 0x40000000: /* Hypervisor leaves are always synthesized by __do_cpuid_func. */ return entry; case 0x80000000: /* * 0x80000021 is sometimes synthesized by __do_cpuid_func, which * would result in out-of-bounds calls to do_host_cpuid. */ { static int max_cpuid_80000000; if (!READ_ONCE(max_cpuid_80000000)) WRITE_ONCE(max_cpuid_80000000, cpuid_eax(0x80000000)); if (function > READ_ONCE(max_cpuid_80000000)) return entry; } break; default: break; } cpuid_count(entry->function, entry->index, &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); if (cpuid_function_is_indexed(function)) entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; return entry; } static int cpuid_func_emulated(struct kvm_cpuid_entry2 *entry, u32 func, bool include_partially_emulated) { memset(entry, 0, sizeof(*entry)); entry->function = func; entry->index = 0; entry->flags = 0; switch (func) { case 0: entry->eax = 7; return 1; case 1: entry->ecx = feature_bit(MOVBE); /* * KVM allows userspace to enumerate MONITOR+MWAIT support to * the guest, but the MWAIT feature flag is never advertised * to userspace because MONITOR+MWAIT aren't virtualized by * hardware, can't be faithfully emulated in software (KVM * emulates them as NOPs), and allowing the guest to execute * them natively requires enabling a per-VM capability. */ if (include_partially_emulated) entry->ecx |= feature_bit(MWAIT); return 1; case 7: entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; entry->eax = 0; if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) entry->ecx = feature_bit(RDPID); return 1; default: return 0; } } static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func) { if (array->nent >= array->maxnent) return -E2BIG; array->nent += cpuid_func_emulated(&array->entries[array->nent], func, false); return 0; } static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) { struct kvm_cpuid_entry2 *entry; int r, i, max_idx; /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); r = -E2BIG; entry = do_host_cpuid(array, function, 0); if (!entry) goto out; switch (function) { case 0: /* Limited to the highest leaf implemented in KVM. */ entry->eax = min(entry->eax, 0x24U); break; case 1: cpuid_entry_override(entry, CPUID_1_EDX); cpuid_entry_override(entry, CPUID_1_ECX); break; case 2: /* * On ancient CPUs, function 2 entries are STATEFUL. That is, * CPUID(function=2, index=0) may return different results each * time, with the least-significant byte in EAX enumerating the * number of times software should do CPUID(2, 0). * * Modern CPUs, i.e. every CPU KVM has *ever* run on are less * idiotic. Intel's SDM states that EAX & 0xff "will always * return 01H. Software should ignore this value and not * interpret it as an informational descriptor", while AMD's * APM states that CPUID(2) is reserved. * * WARN if a frankenstein CPU that supports virtualization and * a stateful CPUID.0x2 is encountered. */ WARN_ON_ONCE((entry->eax & 0xff) > 1); break; /* functions 4 and 0x8000001d have additional index. */ case 4: case 0x8000001d: /* * Read entries until the cache type in the previous entry is * zero, i.e. indicates an invalid entry. */ for (i = 1; entry->eax & 0x1f; ++i) { entry = do_host_cpuid(array, function, i); if (!entry) goto out; } break; case 6: /* Thermal management */ entry->eax = 0x4; /* allow ARAT */ entry->ebx = 0; entry->ecx = 0; entry->edx = 0; break; /* function 7 has additional index. */ case 7: max_idx = entry->eax = min(entry->eax, 2u); cpuid_entry_override(entry, CPUID_7_0_EBX); cpuid_entry_override(entry, CPUID_7_ECX); cpuid_entry_override(entry, CPUID_7_EDX); /* KVM only supports up to 0x7.2, capped above via min(). */ if (max_idx >= 1) { entry = do_host_cpuid(array, function, 1); if (!entry) goto out; cpuid_entry_override(entry, CPUID_7_1_EAX); cpuid_entry_override(entry, CPUID_7_1_ECX); cpuid_entry_override(entry, CPUID_7_1_EDX); entry->ebx = 0; } if (max_idx >= 2) { entry = do_host_cpuid(array, function, 2); if (!entry) goto out; cpuid_entry_override(entry, CPUID_7_2_EDX); entry->ecx = 0; entry->ebx = 0; entry->eax = 0; } break; case 0xa: { /* Architectural Performance Monitoring */ union cpuid10_eax eax = { }; union cpuid10_edx edx = { }; if (!enable_pmu || !static_cpu_has(X86_FEATURE_ARCH_PERFMON)) { entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; } eax.split.version_id = kvm_pmu_cap.version; eax.split.num_counters = kvm_pmu_cap.num_counters_gp; eax.split.bit_width = kvm_pmu_cap.bit_width_gp; eax.split.mask_length = kvm_pmu_cap.events_mask_len; edx.split.num_counters_fixed = kvm_pmu_cap.num_counters_fixed; edx.split.bit_width_fixed = kvm_pmu_cap.bit_width_fixed; if (kvm_pmu_cap.version) edx.split.anythread_deprecated = 1; entry->eax = eax.full; entry->ebx = kvm_pmu_cap.events_mask; entry->ecx = 0; entry->edx = edx.full; break; } case 0x1f: case 0xb: /* * No topology; a valid topology is indicated by the presence * of subleaf 1. */ entry->eax = entry->ebx = entry->ecx = 0; break; case 0xd: { u64 permitted_xcr0 = kvm_get_filtered_xcr0(); u64 permitted_xss = kvm_caps.supported_xss; entry->eax &= permitted_xcr0; entry->ebx = xstate_required_size(permitted_xcr0, false); entry->ecx = entry->ebx; entry->edx &= permitted_xcr0 >> 32; if (!permitted_xcr0) break; entry = do_host_cpuid(array, function, 1); if (!entry) goto out; cpuid_entry_override(entry, CPUID_D_1_EAX); if (entry->eax & (feature_bit(XSAVES) | feature_bit(XSAVEC))) entry->ebx = xstate_required_size(permitted_xcr0 | permitted_xss, true); else { WARN_ON_ONCE(permitted_xss != 0); entry->ebx = 0; } entry->ecx &= permitted_xss; entry->edx &= permitted_xss >> 32; for (i = 2; i < 64; ++i) { bool s_state; if (permitted_xcr0 & BIT_ULL(i)) s_state = false; else if (permitted_xss & BIT_ULL(i)) s_state = true; else continue; entry = do_host_cpuid(array, function, i); if (!entry) goto out; /* * The supported check above should have filtered out * invalid sub-leafs. Only valid sub-leafs should * reach this point, and they should have a non-zero * save state size. Furthermore, check whether the * processor agrees with permitted_xcr0/permitted_xss * on whether this is an XCR0- or IA32_XSS-managed area. */ if (WARN_ON_ONCE(!entry->eax || (entry->ecx & 0x1) != s_state)) { --array->nent; continue; } if (!kvm_cpu_cap_has(X86_FEATURE_XFD)) entry->ecx &= ~BIT_ULL(2); entry->edx = 0; } break; } case 0x12: /* Intel SGX */ if (!kvm_cpu_cap_has(X86_FEATURE_SGX)) { entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; } /* * Index 0: Sub-features, MISCSELECT (a.k.a extended features) * and max enclave sizes. The SGX sub-features and MISCSELECT * are restricted by kernel and KVM capabilities (like most * feature flags), while enclave size is unrestricted. */ cpuid_entry_override(entry, CPUID_12_EAX); entry->ebx &= SGX_MISC_EXINFO; entry = do_host_cpuid(array, function, 1); if (!entry) goto out; /* * Index 1: SECS.ATTRIBUTES. ATTRIBUTES are restricted a la * feature flags. Advertise all supported flags, including * privileged attributes that require explicit opt-in from * userspace. ATTRIBUTES.XFRM is not adjusted as userspace is * expected to derive it from supported XCR0. */ entry->eax &= SGX_ATTR_PRIV_MASK | SGX_ATTR_UNPRIV_MASK; entry->ebx &= 0; break; /* Intel PT */ case 0x14: if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT)) { entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; } for (i = 1, max_idx = entry->eax; i <= max_idx; ++i) { if (!do_host_cpuid(array, function, i)) goto out; } break; /* Intel AMX TILE */ case 0x1d: if (!kvm_cpu_cap_has(X86_FEATURE_AMX_TILE)) { entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; } for (i = 1, max_idx = entry->eax; i <= max_idx; ++i) { if (!do_host_cpuid(array, function, i)) goto out; } break; case 0x1e: /* TMUL information */ if (!kvm_cpu_cap_has(X86_FEATURE_AMX_TILE)) { entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; } break; case 0x24: { u8 avx10_version; if (!kvm_cpu_cap_has(X86_FEATURE_AVX10)) { entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; } /* * The AVX10 version is encoded in EBX[7:0]. Note, the version * is guaranteed to be >=1 if AVX10 is supported. Note #2, the * version needs to be captured before overriding EBX features! */ avx10_version = min_t(u8, entry->ebx & 0xff, 1); cpuid_entry_override(entry, CPUID_24_0_EBX); entry->ebx |= avx10_version; entry->eax = 0; entry->ecx = 0; entry->edx = 0; break; } case KVM_CPUID_SIGNATURE: { const u32 *sigptr = (const u32 *)KVM_SIGNATURE; entry->eax = KVM_CPUID_FEATURES; entry->ebx = sigptr[0]; entry->ecx = sigptr[1]; entry->edx = sigptr[2]; break; } case KVM_CPUID_FEATURES: entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | (1 << KVM_FEATURE_NOP_IO_DELAY) | (1 << KVM_FEATURE_CLOCKSOURCE2) | (1 << KVM_FEATURE_ASYNC_PF) | (1 << KVM_FEATURE_PV_EOI) | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | (1 << KVM_FEATURE_PV_UNHALT) | (1 << KVM_FEATURE_PV_TLB_FLUSH) | (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) | (1 << KVM_FEATURE_PV_SEND_IPI) | (1 << KVM_FEATURE_POLL_CONTROL) | (1 << KVM_FEATURE_PV_SCHED_YIELD) | (1 << KVM_FEATURE_ASYNC_PF_INT); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); entry->ebx = 0; entry->ecx = 0; entry->edx = 0; break; case 0x80000000: entry->eax = min(entry->eax, 0x80000022); /* * Serializing LFENCE is reported in a multitude of ways, and * NullSegClearsBase is not reported in CPUID on Zen2; help * userspace by providing the CPUID leaf ourselves. * * However, only do it if the host has CPUID leaf 0x8000001d. * QEMU thinks that it can query the host blindly for that * CPUID leaf if KVM reports that it supports 0x8000001d or * above. The processor merrily returns values from the * highest Intel leaf which QEMU tries to use as the guest's * 0x8000001d. Even worse, this can result in an infinite * loop if said highest leaf has no subleaves indexed by ECX. */ if (entry->eax >= 0x8000001d && (static_cpu_has(X86_FEATURE_LFENCE_RDTSC) || !static_cpu_has_bug(X86_BUG_NULL_SEG))) entry->eax = max(entry->eax, 0x80000021); break; case 0x80000001: entry->ebx &= ~GENMASK(27, 16); cpuid_entry_override(entry, CPUID_8000_0001_EDX); cpuid_entry_override(entry, CPUID_8000_0001_ECX); break; case 0x80000005: /* Pass host L1 cache and TLB info. */ break; case 0x80000006: /* Drop reserved bits, pass host L2 cache and TLB info. */ entry->edx &= ~GENMASK(17, 16); break; case 0x80000007: /* Advanced power management */ cpuid_entry_override(entry, CPUID_8000_0007_EDX); /* mask against host */ entry->edx &= boot_cpu_data.x86_power; entry->eax = entry->ebx = entry->ecx = 0; break; case 0x80000008: { /* * GuestPhysAddrSize (EAX[23:16]) is intended for software * use. * * KVM's ABI is to report the effective MAXPHYADDR for the * guest in PhysAddrSize (phys_as), and the maximum * *addressable* GPA in GuestPhysAddrSize (g_phys_as). * * GuestPhysAddrSize is valid if and only if TDP is enabled, * in which case the max GPA that can be addressed by KVM may * be less than the max GPA that can be legally generated by * the guest, e.g. if MAXPHYADDR>48 but the CPU doesn't * support 5-level TDP. */ unsigned int virt_as = max((entry->eax >> 8) & 0xff, 48U); unsigned int phys_as, g_phys_as; /* * If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as * the guest operates in the same PA space as the host, i.e. * reductions in MAXPHYADDR for memory encryption affect shadow * paging, too. * * If TDP is enabled, use the raw bare metal MAXPHYADDR as * reductions to the HPAs do not affect GPAs. The max * addressable GPA is the same as the max effective GPA, except * that it's capped at 48 bits if 5-level TDP isn't supported * (hardware processes bits 51:48 only when walking the fifth * level page table). */ if (!tdp_enabled) { phys_as = boot_cpu_data.x86_phys_bits; g_phys_as = 0; } else { phys_as = entry->eax & 0xff; g_phys_as = phys_as; if (kvm_mmu_get_max_tdp_level() < 5) g_phys_as = min(g_phys_as, 48U); } entry->eax = phys_as | (virt_as << 8) | (g_phys_as << 16); entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8)); entry->edx = 0; cpuid_entry_override(entry, CPUID_8000_0008_EBX); break; } case 0x8000000A: if (!kvm_cpu_cap_has(X86_FEATURE_SVM)) { entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; } entry->eax = 1; /* SVM revision 1 */ entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper ASID emulation to nested SVM */ entry->ecx = 0; /* Reserved */ cpuid_entry_override(entry, CPUID_8000_000A_EDX); break; case 0x80000019: entry->ecx = entry->edx = 0; break; case 0x8000001a: entry->eax &= GENMASK(2, 0); entry->ebx = entry->ecx = entry->edx = 0; break; case 0x8000001e: /* Do not return host topology information. */ entry->eax = entry->ebx = entry->ecx = 0; entry->edx = 0; /* reserved */ break; case 0x8000001F: if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) { entry->eax = entry->ebx = entry->ecx = entry->edx = 0; } else { cpuid_entry_override(entry, CPUID_8000_001F_EAX); /* Clear NumVMPL since KVM does not support VMPL. */ entry->ebx &= ~GENMASK(31, 12); /* * Enumerate '0' for "PA bits reduction", the adjusted * MAXPHYADDR is enumerated directly (see 0x80000008). */ entry->ebx &= ~GENMASK(11, 6); } break; case 0x80000020: entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; case 0x80000021: entry->ebx = entry->edx = 0; cpuid_entry_override(entry, CPUID_8000_0021_EAX); cpuid_entry_override(entry, CPUID_8000_0021_ECX); break; /* AMD Extended Performance Monitoring and Debug */ case 0x80000022: { union cpuid_0x80000022_ebx ebx = { }; entry->ecx = entry->edx = 0; if (!enable_pmu || !kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) { entry->eax = entry->ebx = 0; break; } cpuid_entry_override(entry, CPUID_8000_0022_EAX); ebx.split.num_core_pmc = kvm_pmu_cap.num_counters_gp; entry->ebx = ebx.full; break; } /*Add support for Centaur's CPUID instruction*/ case 0xC0000000: /*Just support up to 0xC0000004 now*/ entry->eax = min(entry->eax, 0xC0000004); break; case 0xC0000001: cpuid_entry_override(entry, CPUID_C000_0001_EDX); break; case 3: /* Processor serial number */ case 5: /* MONITOR/MWAIT */ case 0xC0000002: case 0xC0000003: case 0xC0000004: default: entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; } r = 0; out: put_cpu(); return r; } static int do_cpuid_func(struct kvm_cpuid_array *array, u32 func, unsigned int type) { if (type == KVM_GET_EMULATED_CPUID) return __do_cpuid_func_emulated(array, func); return __do_cpuid_func(array, func); } #define CENTAUR_CPUID_SIGNATURE 0xC0000000 static int get_cpuid_func(struct kvm_cpuid_array *array, u32 func, unsigned int type) { u32 limit; int r; if (func == CENTAUR_CPUID_SIGNATURE && boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR && boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) return 0; r = do_cpuid_func(array, func, type); if (r) return r; limit = array->entries[array->nent - 1].eax; for (func = func + 1; func <= limit; ++func) { r = do_cpuid_func(array, func, type); if (r) break; } return r; } static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries, __u32 num_entries, unsigned int ioctl_type) { int i; __u32 pad[3]; if (ioctl_type != KVM_GET_EMULATED_CPUID) return false; /* * We want to make sure that ->padding is being passed clean from * userspace in case we want to use it for something in the future. * * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we * have to give ourselves satisfied only with the emulated side. /me * sheds a tear. */ for (i = 0; i < num_entries; i++) { if (copy_from_user(pad, entries[i].padding, sizeof(pad))) return true; if (pad[0] || pad[1] || pad[2]) return true; } return false; } int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries, unsigned int type) { static const u32 funcs[] = { 0, 0x80000000, CENTAUR_CPUID_SIGNATURE, KVM_CPUID_SIGNATURE, }; struct kvm_cpuid_array array = { .nent = 0, }; int r, i; if (cpuid->nent < 1) return -E2BIG; if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) cpuid->nent = KVM_MAX_CPUID_ENTRIES; if (sanity_check_entries(entries, cpuid->nent, type)) return -EINVAL; array.entries = kvcalloc(cpuid->nent, sizeof(struct kvm_cpuid_entry2), GFP_KERNEL); if (!array.entries) return -ENOMEM; array.maxnent = cpuid->nent; for (i = 0; i < ARRAY_SIZE(funcs); i++) { r = get_cpuid_func(&array, funcs[i], type); if (r) goto out_free; } cpuid->nent = array.nent; if (copy_to_user(entries, array.entries, array.nent * sizeof(struct kvm_cpuid_entry2))) r = -EFAULT; out_free: kvfree(array.entries); return r; } /* * Intel CPUID semantics treats any query for an out-of-range leaf as if the * highest basic leaf (i.e. CPUID.0H:EAX) were requested. AMD CPUID semantics * returns all zeroes for any undefined leaf, whether or not the leaf is in * range. Centaur/VIA follows Intel semantics. * * A leaf is considered out-of-range if its function is higher than the maximum * supported leaf of its associated class or if its associated class does not * exist. * * There are three primary classes to be considered, with their respective * ranges described as "<base> - <top>[,<base2> - <top2>] inclusive. A primary * class exists if a guest CPUID entry for its <base> leaf exists. For a given * class, CPUID.<base>.EAX contains the max supported leaf for the class. * * - Basic: 0x00000000 - 0x3fffffff, 0x50000000 - 0x7fffffff * - Hypervisor: 0x40000000 - 0x4fffffff * - Extended: 0x80000000 - 0xbfffffff * - Centaur: 0xc0000000 - 0xcfffffff * * The Hypervisor class is further subdivided into sub-classes that each act as * their own independent class associated with a 0x100 byte range. E.g. if Qemu * is advertising support for both HyperV and KVM, the resulting Hypervisor * CPUID sub-classes are: * * - HyperV: 0x40000000 - 0x400000ff * - KVM: 0x40000100 - 0x400001ff */ static struct kvm_cpuid_entry2 * get_out_of_range_cpuid_entry(struct kvm_vcpu *vcpu, u32 *fn_ptr, u32 index) { struct kvm_cpuid_entry2 *basic, *class; u32 function = *fn_ptr; basic = kvm_find_cpuid_entry(vcpu, 0); if (!basic) return NULL; if (is_guest_vendor_amd(basic->ebx, basic->ecx, basic->edx) || is_guest_vendor_hygon(basic->ebx, basic->ecx, basic->edx)) return NULL; if (function >= 0x40000000 && function <= 0x4fffffff) class = kvm_find_cpuid_entry(vcpu, function & 0xffffff00); else if (function >= 0xc0000000) class = kvm_find_cpuid_entry(vcpu, 0xc0000000); else class = kvm_find_cpuid_entry(vcpu, function & 0x80000000); if (class && function <= class->eax) return NULL; /* * Leaf specific adjustments are also applied when redirecting to the * max basic entry, e.g. if the max basic leaf is 0xb but there is no * entry for CPUID.0xb.index (see below), then the output value for EDX * needs to be pulled from CPUID.0xb.1. */ *fn_ptr = basic->eax; /* * The class does not exist or the requested function is out of range; * the effective CPUID entry is the max basic leaf. Note, the index of * the original requested leaf is observed! */ return kvm_find_cpuid_entry_index(vcpu, basic->eax, index); } bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool exact_only) { u32 orig_function = *eax, function = *eax, index = *ecx; struct kvm_cpuid_entry2 *entry; bool exact, used_max_basic = false; if (vcpu->arch.cpuid_dynamic_bits_dirty) kvm_update_cpuid_runtime(vcpu); entry = kvm_find_cpuid_entry_index(vcpu, function, index); exact = !!entry; if (!entry && !exact_only) { entry = get_out_of_range_cpuid_entry(vcpu, &function, index); used_max_basic = !!entry; } if (entry) { *eax = entry->eax; *ebx = entry->ebx; *ecx = entry->ecx; *edx = entry->edx; if (function == 7 && index == 0) { u64 data; if ((*ebx & (feature_bit(RTM) | feature_bit(HLE))) && !kvm_msr_read(vcpu, MSR_IA32_TSX_CTRL, &data) && (data & TSX_CTRL_CPUID_CLEAR)) *ebx &= ~(feature_bit(RTM) | feature_bit(HLE)); } else if (function == 0x80000007) { if (kvm_hv_invtsc_suppressed(vcpu)) *edx &= ~feature_bit(CONSTANT_TSC); } else if (IS_ENABLED(CONFIG_KVM_XEN) && kvm_xen_is_tsc_leaf(vcpu, function)) { /* * Update guest TSC frequency information if necessary. * Ignore failures, there is no sane value that can be * provided if KVM can't get the TSC frequency. */ if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) kvm_guest_time_update(vcpu); if (index == 1) { *ecx = vcpu->arch.pvclock_tsc_mul; *edx = vcpu->arch.pvclock_tsc_shift; } else if (index == 2) { *eax = vcpu->arch.hw_tsc_khz; } } } else { *eax = *ebx = *ecx = *edx = 0; /* * When leaf 0BH or 1FH is defined, CL is pass-through * and EDX is always the x2APIC ID, even for undefined * subleaves. Index 1 will exist iff the leaf is * implemented, so we pass through CL iff leaf 1 * exists. EDX can be copied from any existing index. */ if (function == 0xb || function == 0x1f) { entry = kvm_find_cpuid_entry_index(vcpu, function, 1); if (entry) { *ecx = index & 0xff; *edx = entry->edx; } } } trace_kvm_cpuid(orig_function, index, *eax, *ebx, *ecx, *edx, exact, used_max_basic); return exact; } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_cpuid); int kvm_emulate_cpuid(struct kvm_vcpu *vcpu) { u32 eax, ebx, ecx, edx; if (cpuid_fault_enabled(vcpu) && !kvm_require_cpl(vcpu, 0)) return 1; eax = kvm_rax_read(vcpu); ecx = kvm_rcx_read(vcpu); kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, false); kvm_rax_write(vcpu, eax); kvm_rbx_write(vcpu, ebx); kvm_rcx_write(vcpu, ecx); kvm_rdx_write(vcpu, edx); return kvm_skip_emulated_instruction(vcpu); } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_cpuid);
24 19 6 36 46 47 36 19 26 24 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 // SPDX-License-Identifier: GPL-2.0-or-later /* * CTR: Counter mode * * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> */ #include <crypto/algapi.h> #include <crypto/ctr.h> #include <crypto/internal/cipher.h> #include <crypto/internal/skcipher.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> struct crypto_rfc3686_ctx { struct crypto_skcipher *child; u8 nonce[CTR_RFC3686_NONCE_SIZE]; }; struct crypto_rfc3686_req_ctx { u8 iv[CTR_RFC3686_BLOCK_SIZE]; struct skcipher_request subreq CRYPTO_MINALIGN_ATTR; }; static void crypto_ctr_crypt_final(struct skcipher_walk *walk, struct crypto_cipher *tfm) { unsigned int bsize = crypto_cipher_blocksize(tfm); unsigned long alignmask = crypto_cipher_alignmask(tfm); u8 *ctrblk = walk->iv; u8 tmp[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK]; u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1); const u8 *src = walk->src.virt.addr; u8 *dst = walk->dst.virt.addr; unsigned int nbytes = walk->nbytes; crypto_cipher_encrypt_one(tfm, keystream, ctrblk); crypto_xor_cpy(dst, keystream, src, nbytes); crypto_inc(ctrblk, bsize); } static int crypto_ctr_crypt_segment(struct skcipher_walk *walk, struct crypto_cipher *tfm) { void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = crypto_cipher_alg(tfm)->cia_encrypt; unsigned int bsize = crypto_cipher_blocksize(tfm); u8 *ctrblk = walk->iv; const u8 *src = walk->src.virt.addr; u8 *dst = walk->dst.virt.addr; unsigned int nbytes = walk->nbytes; do { /* create keystream */ fn(crypto_cipher_tfm(tfm), dst, ctrblk); crypto_xor(dst, src, bsize); /* increment counter in counterblock */ crypto_inc(ctrblk, bsize); src += bsize; dst += bsize; } while ((nbytes -= bsize) >= bsize); return nbytes; } static int crypto_ctr_crypt_inplace(struct skcipher_walk *walk, struct crypto_cipher *tfm) { void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = crypto_cipher_alg(tfm)->cia_encrypt; unsigned int bsize = crypto_cipher_blocksize(tfm); unsigned long alignmask = crypto_cipher_alignmask(tfm); unsigned int nbytes = walk->nbytes; u8 *dst = walk->dst.virt.addr; u8 *ctrblk = walk->iv; u8 tmp[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK]; u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1); do { /* create keystream */ fn(crypto_cipher_tfm(tfm), keystream, ctrblk); crypto_xor(dst, keystream, bsize); /* increment counter in counterblock */ crypto_inc(ctrblk, bsize); dst += bsize; } while ((nbytes -= bsize) >= bsize); return nbytes; } static int crypto_ctr_crypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_cipher *cipher = skcipher_cipher_simple(tfm); const unsigned int bsize = crypto_cipher_blocksize(cipher); struct skcipher_walk walk; unsigned int nbytes; int err; err = skcipher_walk_virt(&walk, req, false); while (walk.nbytes >= bsize) { if (walk.src.virt.addr == walk.dst.virt.addr) nbytes = crypto_ctr_crypt_inplace(&walk, cipher); else nbytes = crypto_ctr_crypt_segment(&walk, cipher); err = skcipher_walk_done(&walk, nbytes); } if (walk.nbytes) { crypto_ctr_crypt_final(&walk, cipher); err = skcipher_walk_done(&walk, 0); } return err; } static int crypto_ctr_create(struct crypto_template *tmpl, struct rtattr **tb) { struct skcipher_instance *inst; struct crypto_alg *alg; int err; inst = skcipher_alloc_instance_simple(tmpl, tb); if (IS_ERR(inst)) return PTR_ERR(inst); alg = skcipher_ialg_simple(inst); /* Block size must be >= 4 bytes. */ err = -EINVAL; if (alg->cra_blocksize < 4) goto out_free_inst; /* If this is false we'd fail the alignment of crypto_inc. */ if (alg->cra_blocksize % 4) goto out_free_inst; /* CTR mode is a stream cipher. */ inst->alg.base.cra_blocksize = 1; /* * To simplify the implementation, configure the skcipher walk to only * give a partial block at the very end, never earlier. */ inst->alg.chunksize = alg->cra_blocksize; inst->alg.encrypt = crypto_ctr_crypt; inst->alg.decrypt = crypto_ctr_crypt; err = skcipher_register_instance(tmpl, inst); if (err) { out_free_inst: inst->free(inst); } return err; } static int crypto_rfc3686_setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { struct crypto_rfc3686_ctx *ctx = crypto_skcipher_ctx(parent); struct crypto_skcipher *child = ctx->child; /* the nonce is stored in bytes at end of key */ if (keylen < CTR_RFC3686_NONCE_SIZE) return -EINVAL; memcpy(ctx->nonce, key + (keylen - CTR_RFC3686_NONCE_SIZE), CTR_RFC3686_NONCE_SIZE); keylen -= CTR_RFC3686_NONCE_SIZE; crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & CRYPTO_TFM_REQ_MASK); return crypto_skcipher_setkey(child, key, keylen); } static int crypto_rfc3686_crypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_rfc3686_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_skcipher *child = ctx->child; unsigned long align = crypto_skcipher_alignmask(tfm); struct crypto_rfc3686_req_ctx *rctx = (void *)PTR_ALIGN((u8 *)skcipher_request_ctx(req), align + 1); struct skcipher_request *subreq = &rctx->subreq; u8 *iv = rctx->iv; /* set up counter block */ memcpy(iv, ctx->nonce, CTR_RFC3686_NONCE_SIZE); memcpy(iv + CTR_RFC3686_NONCE_SIZE, req->iv, CTR_RFC3686_IV_SIZE); /* initialize counter portion of counter block */ *(__be32 *)(iv + CTR_RFC3686_NONCE_SIZE + CTR_RFC3686_IV_SIZE) = cpu_to_be32(1); skcipher_request_set_tfm(subreq, child); skcipher_request_set_callback(subreq, req->base.flags, req->base.complete, req->base.data); skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, iv); return crypto_skcipher_encrypt(subreq); } static int crypto_rfc3686_init_tfm(struct crypto_skcipher *tfm) { struct skcipher_instance *inst = skcipher_alg_instance(tfm); struct crypto_skcipher_spawn *spawn = skcipher_instance_ctx(inst); struct crypto_rfc3686_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_skcipher *cipher; unsigned long align; unsigned int reqsize; cipher = crypto_spawn_skcipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); ctx->child = cipher; align = crypto_skcipher_alignmask(tfm); align &= ~(crypto_tfm_ctx_alignment() - 1); reqsize = align + sizeof(struct crypto_rfc3686_req_ctx) + crypto_skcipher_reqsize(cipher); crypto_skcipher_set_reqsize(tfm, reqsize); return 0; } static void crypto_rfc3686_exit_tfm(struct crypto_skcipher *tfm) { struct crypto_rfc3686_ctx *ctx = crypto_skcipher_ctx(tfm); crypto_free_skcipher(ctx->child); } static void crypto_rfc3686_free(struct skcipher_instance *inst) { struct crypto_skcipher_spawn *spawn = skcipher_instance_ctx(inst); crypto_drop_skcipher(spawn); kfree(inst); } static int crypto_rfc3686_create(struct crypto_template *tmpl, struct rtattr **tb) { struct skcipher_instance *inst; struct crypto_skcipher_spawn *spawn; struct skcipher_alg_common *alg; u32 mask; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER, &mask); if (err) return err; inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return -ENOMEM; spawn = skcipher_instance_ctx(inst); err = crypto_grab_skcipher(spawn, skcipher_crypto_instance(inst), crypto_attr_alg_name(tb[1]), 0, mask); if (err) goto err_free_inst; alg = crypto_spawn_skcipher_alg_common(spawn); /* We only support 16-byte blocks. */ err = -EINVAL; if (alg->ivsize != CTR_RFC3686_BLOCK_SIZE) goto err_free_inst; /* Not a stream cipher? */ if (alg->base.cra_blocksize != 1) goto err_free_inst; err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "rfc3686(%s)", alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "rfc3686(%s)", alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; inst->alg.base.cra_priority = alg->base.cra_priority; inst->alg.base.cra_blocksize = 1; inst->alg.base.cra_alignmask = alg->base.cra_alignmask; inst->alg.ivsize = CTR_RFC3686_IV_SIZE; inst->alg.chunksize = alg->chunksize; inst->alg.min_keysize = alg->min_keysize + CTR_RFC3686_NONCE_SIZE; inst->alg.max_keysize = alg->max_keysize + CTR_RFC3686_NONCE_SIZE; inst->alg.setkey = crypto_rfc3686_setkey; inst->alg.encrypt = crypto_rfc3686_crypt; inst->alg.decrypt = crypto_rfc3686_crypt; inst->alg.base.cra_ctxsize = sizeof(struct crypto_rfc3686_ctx); inst->alg.init = crypto_rfc3686_init_tfm; inst->alg.exit = crypto_rfc3686_exit_tfm; inst->free = crypto_rfc3686_free; err = skcipher_register_instance(tmpl, inst); if (err) { err_free_inst: crypto_rfc3686_free(inst); } return err; } static struct crypto_template crypto_ctr_tmpls[] = { { .name = "ctr", .create = crypto_ctr_create, .module = THIS_MODULE, }, { .name = "rfc3686", .create = crypto_rfc3686_create, .module = THIS_MODULE, }, }; static int __init crypto_ctr_module_init(void) { return crypto_register_templates(crypto_ctr_tmpls, ARRAY_SIZE(crypto_ctr_tmpls)); } static void __exit crypto_ctr_module_exit(void) { crypto_unregister_templates(crypto_ctr_tmpls, ARRAY_SIZE(crypto_ctr_tmpls)); } module_init(crypto_ctr_module_init); module_exit(crypto_ctr_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CTR block cipher mode of operation"); MODULE_ALIAS_CRYPTO("rfc3686"); MODULE_ALIAS_CRYPTO("ctr"); MODULE_IMPORT_NS("CRYPTO_INTERNAL");
15 11 1 3 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 // SPDX-License-Identifier: GPL-2.0-only /* * This is a module which is used for rejecting packets. */ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/ip.h> #include <linux/udp.h> #include <linux/icmp.h> #include <net/icmp.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_REJECT.h> #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) #include <linux/netfilter_bridge.h> #endif #include <net/netfilter/ipv4/nf_reject.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4"); static unsigned int reject_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ipt_reject_info *reject = par->targinfo; int hook = xt_hooknum(par); switch (reject->with) { case IPT_ICMP_NET_UNREACHABLE: nf_send_unreach(skb, ICMP_NET_UNREACH, hook); break; case IPT_ICMP_HOST_UNREACHABLE: nf_send_unreach(skb, ICMP_HOST_UNREACH, hook); break; case IPT_ICMP_PROT_UNREACHABLE: nf_send_unreach(skb, ICMP_PROT_UNREACH, hook); break; case IPT_ICMP_PORT_UNREACHABLE: nf_send_unreach(skb, ICMP_PORT_UNREACH, hook); break; case IPT_ICMP_NET_PROHIBITED: nf_send_unreach(skb, ICMP_NET_ANO, hook); break; case IPT_ICMP_HOST_PROHIBITED: nf_send_unreach(skb, ICMP_HOST_ANO, hook); break; case IPT_ICMP_ADMIN_PROHIBITED: nf_send_unreach(skb, ICMP_PKT_FILTERED, hook); break; case IPT_TCP_RESET: nf_send_reset(xt_net(par), par->state->sk, skb, hook); break; case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; } return NF_DROP; } static int reject_tg_check(const struct xt_tgchk_param *par) { const struct ipt_reject_info *rejinfo = par->targinfo; const struct ipt_entry *e = par->entryinfo; if (rejinfo->with == IPT_ICMP_ECHOREPLY) { pr_info_ratelimited("ECHOREPLY no longer supported.\n"); return -EINVAL; } else if (rejinfo->with == IPT_TCP_RESET) { /* Must specify that it's a TCP packet */ if (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO)) { pr_info_ratelimited("TCP_RESET invalid for non-tcp\n"); return -EINVAL; } } return 0; } static struct xt_target reject_tg_reg __read_mostly = { .name = "REJECT", .family = NFPROTO_IPV4, .target = reject_tg, .targetsize = sizeof(struct ipt_reject_info), .table = "filter", .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT), .checkentry = reject_tg_check, .me = THIS_MODULE, }; static int __init reject_tg_init(void) { return xt_register_target(&reject_tg_reg); } static void __exit reject_tg_exit(void) { xt_unregister_target(&reject_tg_reg); } module_init(reject_tg_init); module_exit(reject_tg_exit);
8 91 4 3 20 20 1 1 1 7 7 64 2 13 49 62 62 6 6 13 2 1 36 34 17 3 3 3 9 6 6 6 17 6 1 8 11 2 13 3 3 28 28 28 28 49 49 49 3 3 3 2014 1996 20 20 2 2 1 15 6 6 6 6 6 6 50 50 4 2 4 2 4 3 2 3 3 6 1 2 2 1 10 1 1 2 4 3 8 76 2 1 2 53 20 51 4 16 2 71 61 7 1 1 2 2 1 6 2 1 1 2 1 6 6 5 4 4 3 3 10 2 1 2 5 2 3 20 1 2 17 2 3 3 7 2 6 3 5 4 3 2 2 2 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 /* * net/tipc/bearer.c: TIPC bearer code * * Copyright (c) 1996-2006, 2013-2016, Ericsson AB * Copyright (c) 2004-2006, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <net/sock.h> #include "core.h" #include "bearer.h" #include "link.h" #include "discover.h" #include "monitor.h" #include "bcast.h" #include "netlink.h" #include "udp_media.h" #include "trace.h" #include "crypto.h" #define MAX_ADDR_STR 60 static struct tipc_media * const media_info_array[] = { &eth_media_info, #ifdef CONFIG_TIPC_MEDIA_IB &ib_media_info, #endif #ifdef CONFIG_TIPC_MEDIA_UDP &udp_media_info, #endif NULL }; static struct tipc_bearer *bearer_get(struct net *net, int bearer_id) { struct tipc_net *tn = tipc_net(net); return rcu_dereference(tn->bearer_list[bearer_id]); } static void bearer_disable(struct net *net, struct tipc_bearer *b); static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); /** * tipc_media_find - locates specified media object by name * @name: name to locate */ struct tipc_media *tipc_media_find(const char *name) { u32 i; for (i = 0; media_info_array[i] != NULL; i++) { if (!strcmp(media_info_array[i]->name, name)) break; } return media_info_array[i]; } /** * media_find_id - locates specified media object by type identifier * @type: type identifier to locate */ static struct tipc_media *media_find_id(u8 type) { u32 i; for (i = 0; media_info_array[i] != NULL; i++) { if (media_info_array[i]->type_id == type) break; } return media_info_array[i]; } /** * tipc_media_addr_printf - record media address in print buffer * @buf: output buffer * @len: output buffer size remaining * @a: input media address */ int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a) { char addr_str[MAX_ADDR_STR]; struct tipc_media *m; int ret; m = media_find_id(a->media_id); if (m && !m->addr2str(a, addr_str, sizeof(addr_str))) ret = scnprintf(buf, len, "%s(%s)", m->name, addr_str); else { u32 i; ret = scnprintf(buf, len, "UNKNOWN(%u)", a->media_id); for (i = 0; i < sizeof(a->value); i++) ret += scnprintf(buf + ret, len - ret, "-%x", a->value[i]); } return ret; } /** * bearer_name_validate - validate & (optionally) deconstruct bearer name * @name: ptr to bearer name string * @name_parts: ptr to area for bearer name components (or NULL if not needed) * * Return: 1 if bearer name is valid, otherwise 0. */ static int bearer_name_validate(const char *name, struct tipc_bearer_names *name_parts) { char name_copy[TIPC_MAX_BEARER_NAME]; char *media_name; char *if_name; u32 media_len; u32 if_len; /* copy bearer name & ensure length is OK */ if (strscpy(name_copy, name, TIPC_MAX_BEARER_NAME) < 0) return 0; /* ensure all component parts of bearer name are present */ media_name = name_copy; if_name = strchr(media_name, ':'); if (if_name == NULL) return 0; *(if_name++) = 0; media_len = if_name - media_name; if_len = strlen(if_name) + 1; /* validate component parts of bearer name */ if ((media_len <= 1) || (media_len > TIPC_MAX_MEDIA_NAME) || (if_len <= 1) || (if_len > TIPC_MAX_IF_NAME)) return 0; /* return bearer name components, if necessary */ if (name_parts) { if (strscpy(name_parts->media_name, media_name, TIPC_MAX_MEDIA_NAME) < 0) return 0; if (strscpy(name_parts->if_name, if_name, TIPC_MAX_IF_NAME) < 0) return 0; } return 1; } /** * tipc_bearer_find - locates bearer object with matching bearer name * @net: the applicable net namespace * @name: bearer name to locate */ struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) { struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; u32 i; for (i = 0; i < MAX_BEARERS; i++) { b = rtnl_dereference(tn->bearer_list[i]); if (b && (!strcmp(b->name, name))) return b; } return NULL; } /* tipc_bearer_get_name - get the bearer name from its id. * @net: network namespace * @name: a pointer to the buffer where the name will be stored. * @bearer_id: the id to get the name from. */ int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id) { struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; if (bearer_id >= MAX_BEARERS) return -EINVAL; b = rtnl_dereference(tn->bearer_list[bearer_id]); if (!b) return -EINVAL; strcpy(name, b->name); return 0; } void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) { struct tipc_bearer *b; rcu_read_lock(); b = bearer_get(net, bearer_id); if (b) tipc_disc_add_dest(b->disc); rcu_read_unlock(); } void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) { struct tipc_bearer *b; rcu_read_lock(); b = bearer_get(net, bearer_id); if (b) tipc_disc_remove_dest(b->disc); rcu_read_unlock(); } /** * tipc_enable_bearer - enable bearer with the given name * @net: the applicable net namespace * @name: bearer name to enable * @disc_domain: bearer domain * @prio: bearer priority * @attr: nlattr array * @extack: netlink extended ack */ static int tipc_enable_bearer(struct net *net, const char *name, u32 disc_domain, u32 prio, struct nlattr *attr[], struct netlink_ext_ack *extack) { struct tipc_net *tn = tipc_net(net); struct tipc_bearer_names b_names; int with_this_prio = 1; struct tipc_bearer *b; struct tipc_media *m; struct sk_buff *skb; int bearer_id = 0; int res = -EINVAL; char *errstr = ""; u32 i; if (!bearer_name_validate(name, &b_names)) { NL_SET_ERR_MSG(extack, "Illegal name"); return res; } if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { errstr = "illegal priority"; NL_SET_ERR_MSG(extack, "Illegal priority"); goto rejected; } m = tipc_media_find(b_names.media_name); if (!m) { errstr = "media not registered"; NL_SET_ERR_MSG(extack, "Media not registered"); goto rejected; } if (prio == TIPC_MEDIA_LINK_PRI) prio = m->priority; /* Check new bearer vs existing ones and find free bearer id if any */ bearer_id = MAX_BEARERS; i = MAX_BEARERS; while (i-- != 0) { b = rtnl_dereference(tn->bearer_list[i]); if (!b) { bearer_id = i; continue; } if (!strcmp(name, b->name)) { errstr = "already enabled"; NL_SET_ERR_MSG(extack, "Already enabled"); goto rejected; } if (b->priority == prio && (++with_this_prio > 2)) { pr_warn("Bearer <%s>: already 2 bearers with priority %u\n", name, prio); if (prio == TIPC_MIN_LINK_PRI) { errstr = "cannot adjust to lower"; NL_SET_ERR_MSG(extack, "Cannot adjust to lower"); goto rejected; } pr_warn("Bearer <%s>: trying with adjusted priority\n", name); prio--; bearer_id = MAX_BEARERS; i = MAX_BEARERS; with_this_prio = 1; } } if (bearer_id >= MAX_BEARERS) { errstr = "max 3 bearers permitted"; NL_SET_ERR_MSG(extack, "Max 3 bearers permitted"); goto rejected; } b = kzalloc(sizeof(*b), GFP_ATOMIC); if (!b) return -ENOMEM; strscpy(b->name, name); b->media = m; res = m->enable_media(net, b, attr); if (res) { kfree(b); errstr = "failed to enable media"; NL_SET_ERR_MSG(extack, "Failed to enable media"); goto rejected; } b->identity = bearer_id; b->tolerance = m->tolerance; b->min_win = m->min_win; b->max_win = m->max_win; b->domain = disc_domain; b->net_plane = bearer_id + 'A'; b->priority = prio; refcount_set(&b->refcnt, 1); res = tipc_disc_create(net, b, &b->bcast_addr, &skb); if (res) { bearer_disable(net, b); errstr = "failed to create discoverer"; NL_SET_ERR_MSG(extack, "Failed to create discoverer"); goto rejected; } /* Create monitoring data before accepting activate messages */ if (tipc_mon_create(net, bearer_id)) { bearer_disable(net, b); kfree_skb(skb); return -ENOMEM; } test_and_set_bit_lock(0, &b->up); rcu_assign_pointer(tn->bearer_list[bearer_id], b); if (skb) tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); pr_info("Enabled bearer <%s>, priority %u\n", name, prio); return res; rejected: pr_warn("Enabling of bearer <%s> rejected, %s\n", name, errstr); return res; } /** * tipc_reset_bearer - Reset all links established over this bearer * @net: the applicable net namespace * @b: the target bearer */ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) { pr_info("Resetting bearer <%s>\n", b->name); tipc_node_delete_links(net, b->identity); tipc_disc_reset(net, b); return 0; } bool tipc_bearer_hold(struct tipc_bearer *b) { return (b && refcount_inc_not_zero(&b->refcnt)); } void tipc_bearer_put(struct tipc_bearer *b) { if (b && refcount_dec_and_test(&b->refcnt)) kfree_rcu(b, rcu); } /** * bearer_disable - disable this bearer * @net: the applicable net namespace * @b: the bearer to disable * * Note: This routine assumes caller holds RTNL lock. */ static void bearer_disable(struct net *net, struct tipc_bearer *b) { struct tipc_net *tn = tipc_net(net); int bearer_id = b->identity; pr_info("Disabling bearer <%s>\n", b->name); clear_bit_unlock(0, &b->up); tipc_node_delete_links(net, bearer_id); b->media->disable_media(b); RCU_INIT_POINTER(b->media_ptr, NULL); if (b->disc) tipc_disc_delete(b->disc); RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL); tipc_bearer_put(b); tipc_mon_delete(net, bearer_id); } int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, struct nlattr *attr[]) { char *dev_name = strchr((const char *)b->name, ':') + 1; int hwaddr_len = b->media->hwaddr_len; u8 node_id[NODE_ID_LEN] = {0,}; struct net_device *dev; /* Find device with specified name */ dev = dev_get_by_name(net, dev_name); if (!dev) return -ENODEV; if (tipc_mtu_bad(dev)) { dev_put(dev); return -EINVAL; } if (dev == net->loopback_dev) { dev_put(dev); pr_info("Enabling <%s> not permitted\n", b->name); return -EINVAL; } /* Autoconfigure own node identity if needed */ if (!tipc_own_id(net) && hwaddr_len <= NODE_ID_LEN) { memcpy(node_id, dev->dev_addr, hwaddr_len); tipc_net_init(net, node_id, 0); } if (!tipc_own_id(net)) { dev_put(dev); pr_warn("Failed to obtain node identity\n"); return -EINVAL; } /* Associate TIPC bearer with L2 bearer */ rcu_assign_pointer(b->media_ptr, dev); b->pt.dev = dev; b->pt.type = htons(ETH_P_TIPC); b->pt.func = tipc_l2_rcv_msg; dev_add_pack(&b->pt); memset(&b->bcast_addr, 0, sizeof(b->bcast_addr)); memcpy(b->bcast_addr.value, dev->broadcast, hwaddr_len); b->bcast_addr.media_id = b->media->type_id; b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT; b->mtu = dev->mtu; b->media->raw2addr(b, &b->addr, (const char *)dev->dev_addr); rcu_assign_pointer(dev->tipc_ptr, b); return 0; } /* tipc_disable_l2_media - detach TIPC bearer from an L2 interface * @b: the target bearer * * Mark L2 bearer as inactive so that incoming buffers are thrown away */ void tipc_disable_l2_media(struct tipc_bearer *b) { struct net_device *dev; dev = (struct net_device *)rtnl_dereference(b->media_ptr); dev_remove_pack(&b->pt); RCU_INIT_POINTER(dev->tipc_ptr, NULL); synchronize_net(); dev_put(dev); } /** * tipc_l2_send_msg - send a TIPC packet out over an L2 interface * @net: the associated network namespace * @skb: the packet to be sent * @b: the bearer through which the packet is to be sent * @dest: peer destination address */ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, struct tipc_bearer *b, struct tipc_media_addr *dest) { struct net_device *dev; int delta; dev = (struct net_device *)rcu_dereference(b->media_ptr); if (!dev) return 0; delta = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb)); if ((delta > 0) && pskb_expand_head(skb, delta, 0, GFP_ATOMIC)) { kfree_skb(skb); return 0; } skb_reset_network_header(skb); skb->dev = dev; skb->protocol = htons(ETH_P_TIPC); dev_hard_header(skb, dev, ETH_P_TIPC, dest->value, dev->dev_addr, skb->len); dev_queue_xmit(skb); return 0; } bool tipc_bearer_bcast_support(struct net *net, u32 bearer_id) { bool supp = false; struct tipc_bearer *b; rcu_read_lock(); b = bearer_get(net, bearer_id); if (b) supp = (b->bcast_addr.broadcast == TIPC_BROADCAST_SUPPORT); rcu_read_unlock(); return supp; } int tipc_bearer_mtu(struct net *net, u32 bearer_id) { int mtu = 0; struct tipc_bearer *b; rcu_read_lock(); b = bearer_get(net, bearer_id); if (b) mtu = b->mtu; rcu_read_unlock(); return mtu; } int tipc_bearer_min_mtu(struct net *net, u32 bearer_id) { int mtu = TIPC_MIN_BEARER_MTU; struct tipc_bearer *b; rcu_read_lock(); b = bearer_get(net, bearer_id); if (b) mtu += b->encap_hlen; rcu_read_unlock(); return mtu; } /* tipc_bearer_xmit_skb - sends buffer to destination over bearer */ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, struct sk_buff *skb, struct tipc_media_addr *dest) { struct tipc_msg *hdr = buf_msg(skb); struct tipc_bearer *b; rcu_read_lock(); b = bearer_get(net, bearer_id); if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr)))) { #ifdef CONFIG_TIPC_CRYPTO tipc_crypto_xmit(net, &skb, b, dest, NULL); if (skb) #endif b->media->send_msg(net, skb, b, dest); } else { kfree_skb(skb); } rcu_read_unlock(); } /* tipc_bearer_xmit() -send buffer to destination over bearer */ void tipc_bearer_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq, struct tipc_media_addr *dst, struct tipc_node *__dnode) { struct tipc_bearer *b; struct sk_buff *skb, *tmp; if (skb_queue_empty(xmitq)) return; rcu_read_lock(); b = bearer_get(net, bearer_id); if (unlikely(!b)) __skb_queue_purge(xmitq); skb_queue_walk_safe(xmitq, skb, tmp) { __skb_dequeue(xmitq); if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb)))) { #ifdef CONFIG_TIPC_CRYPTO tipc_crypto_xmit(net, &skb, b, dst, __dnode); if (skb) #endif b->media->send_msg(net, skb, b, dst); } else { kfree_skb(skb); } } rcu_read_unlock(); } /* tipc_bearer_bc_xmit() - broadcast buffers to all destinations */ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq) { struct tipc_net *tn = tipc_net(net); struct tipc_media_addr *dst; int net_id = tn->net_id; struct tipc_bearer *b; struct sk_buff *skb, *tmp; struct tipc_msg *hdr; rcu_read_lock(); b = bearer_get(net, bearer_id); if (unlikely(!b || !test_bit(0, &b->up))) __skb_queue_purge(xmitq); skb_queue_walk_safe(xmitq, skb, tmp) { hdr = buf_msg(skb); msg_set_non_seq(hdr, 1); msg_set_mc_netid(hdr, net_id); __skb_dequeue(xmitq); dst = &b->bcast_addr; #ifdef CONFIG_TIPC_CRYPTO tipc_crypto_xmit(net, &skb, b, dst, NULL); if (skb) #endif b->media->send_msg(net, skb, b, dst); } rcu_read_unlock(); } /** * tipc_l2_rcv_msg - handle incoming TIPC message from an interface * @skb: the received message * @dev: the net device that the packet was received on * @pt: the packet_type structure which was used to register this handler * @orig_dev: the original receive net device in case the device is a bond * * Accept only packets explicitly sent to this node, or broadcast packets; * ignores packets sent using interface multicast, and traffic sent to other * nodes (which can happen if interface is running in promiscuous mode). */ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct tipc_bearer *b; rcu_read_lock(); b = rcu_dereference(dev->tipc_ptr) ?: rcu_dereference(orig_dev->tipc_ptr); if (likely(b && test_bit(0, &b->up) && (skb->pkt_type <= PACKET_MULTICAST))) { skb_mark_not_on_list(skb); TIPC_SKB_CB(skb)->flags = 0; tipc_rcv(dev_net(b->pt.dev), skb, b); rcu_read_unlock(); return NET_RX_SUCCESS; } rcu_read_unlock(); kfree_skb(skb); return NET_RX_DROP; } /** * tipc_l2_device_event - handle device events from network device * @nb: the context of the notification * @evt: the type of event * @ptr: the net device that the event was on * * This function is called by the Ethernet driver in case of link * change event. */ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct tipc_bearer *b; b = rtnl_dereference(dev->tipc_ptr); if (!b) return NOTIFY_DONE; trace_tipc_l2_device_event(dev, b, evt); switch (evt) { case NETDEV_CHANGE: if (netif_carrier_ok(dev) && netif_oper_up(dev)) { test_and_set_bit_lock(0, &b->up); break; } fallthrough; case NETDEV_GOING_DOWN: clear_bit_unlock(0, &b->up); tipc_reset_bearer(net, b); break; case NETDEV_UP: test_and_set_bit_lock(0, &b->up); break; case NETDEV_CHANGEMTU: if (tipc_mtu_bad(dev)) { bearer_disable(net, b); break; } b->mtu = dev->mtu; tipc_reset_bearer(net, b); break; case NETDEV_CHANGEADDR: b->media->raw2addr(b, &b->addr, (const char *)dev->dev_addr); tipc_reset_bearer(net, b); break; case NETDEV_UNREGISTER: case NETDEV_CHANGENAME: bearer_disable(net, b); break; } return NOTIFY_OK; } static struct notifier_block notifier = { .notifier_call = tipc_l2_device_event, .priority = 0, }; int tipc_bearer_setup(void) { return register_netdevice_notifier(&notifier); } void tipc_bearer_cleanup(void) { unregister_netdevice_notifier(&notifier); } void tipc_bearer_stop(struct net *net) { struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; u32 i; for (i = 0; i < MAX_BEARERS; i++) { b = rtnl_dereference(tn->bearer_list[i]); if (b) { bearer_disable(net, b); tn->bearer_list[i] = NULL; } } } void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts) { struct net_device *dev = net->loopback_dev; struct sk_buff *skb, *_skb; int exp; skb_queue_walk(pkts, _skb) { skb = pskb_copy(_skb, GFP_ATOMIC); if (!skb) continue; exp = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb)); if (exp > 0 && pskb_expand_head(skb, exp, 0, GFP_ATOMIC)) { kfree_skb(skb); continue; } skb_reset_network_header(skb); dev_hard_header(skb, dev, ETH_P_TIPC, dev->dev_addr, dev->dev_addr, skb->len); skb->dev = dev; skb->pkt_type = PACKET_HOST; skb->ip_summed = CHECKSUM_UNNECESSARY; skb->protocol = eth_type_trans(skb, dev); netif_rx(skb); } } static int tipc_loopback_rcv_pkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *od) { consume_skb(skb); return NET_RX_SUCCESS; } int tipc_attach_loopback(struct net *net) { struct net_device *dev = net->loopback_dev; struct tipc_net *tn = tipc_net(net); if (!dev) return -ENODEV; netdev_hold(dev, &tn->loopback_pt.dev_tracker, GFP_KERNEL); tn->loopback_pt.dev = dev; tn->loopback_pt.type = htons(ETH_P_TIPC); tn->loopback_pt.func = tipc_loopback_rcv_pkt; dev_add_pack(&tn->loopback_pt); return 0; } void tipc_detach_loopback(struct net *net) { struct tipc_net *tn = tipc_net(net); dev_remove_pack(&tn->loopback_pt); netdev_put(net->loopback_dev, &tn->loopback_pt.dev_tracker); } /* Caller should hold rtnl_lock to protect the bearer */ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, struct tipc_bearer *bearer, int nlflags) { void *hdr; struct nlattr *attrs; struct nlattr *prop; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, nlflags, TIPC_NL_BEARER_GET); if (!hdr) return -EMSGSIZE; attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER); if (!attrs) goto msg_full; if (nla_put_string(msg->skb, TIPC_NLA_BEARER_NAME, bearer->name)) goto attr_msg_full; prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER_PROP); if (!prop) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, bearer->priority)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, bearer->tolerance)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->max_win)) goto prop_msg_full; if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, bearer->mtu)) goto prop_msg_full; nla_nest_end(msg->skb, prop); #ifdef CONFIG_TIPC_MEDIA_UDP if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) { if (tipc_udp_nl_add_bearer_data(msg, bearer)) goto attr_msg_full; } #endif nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); return 0; prop_msg_full: nla_nest_cancel(msg->skb, prop); attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; } int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; int i = cb->args[0]; struct tipc_bearer *bearer; struct tipc_nl_msg msg; struct net *net = sock_net(skb->sk); struct tipc_net *tn = tipc_net(net); if (i == MAX_BEARERS) return 0; msg.skb = skb; msg.portid = NETLINK_CB(cb->skb).portid; msg.seq = cb->nlh->nlmsg_seq; rtnl_lock(); for (i = 0; i < MAX_BEARERS; i++) { bearer = rtnl_dereference(tn->bearer_list[i]); if (!bearer) continue; err = __tipc_nl_add_bearer(&msg, bearer, NLM_F_MULTI); if (err) break; } rtnl_unlock(); cb->args[0] = i; return skb->len; } int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) { int err; char *name; struct sk_buff *rep; struct tipc_bearer *bearer; struct tipc_nl_msg msg; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; struct net *net = genl_info_net(info); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], tipc_nl_bearer_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_BEARER_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!rep) return -ENOMEM; msg.skb = rep; msg.portid = info->snd_portid; msg.seq = info->snd_seq; rtnl_lock(); bearer = tipc_bearer_find(net, name); if (!bearer) { err = -EINVAL; NL_SET_ERR_MSG(info->extack, "Bearer not found"); goto err_out; } err = __tipc_nl_add_bearer(&msg, bearer, 0); if (err) goto err_out; rtnl_unlock(); return genlmsg_reply(rep, info); err_out: rtnl_unlock(); nlmsg_free(rep); return err; } int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) { int err; char *name; struct tipc_bearer *bearer; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; struct net *net = sock_net(skb->sk); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], tipc_nl_bearer_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_BEARER_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); bearer = tipc_bearer_find(net, name); if (!bearer) { NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; } bearer_disable(net, bearer); return 0; } int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) { int err; rtnl_lock(); err = __tipc_nl_bearer_disable(skb, info); rtnl_unlock(); return err; } int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) { int err; char *bearer; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; struct net *net = sock_net(skb->sk); u32 domain = 0; u32 prio; prio = TIPC_MEDIA_LINK_PRI; if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], tipc_nl_bearer_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_BEARER_NAME]) return -EINVAL; bearer = nla_data(attrs[TIPC_NLA_BEARER_NAME]); if (attrs[TIPC_NLA_BEARER_DOMAIN]) domain = nla_get_u32(attrs[TIPC_NLA_BEARER_DOMAIN]); if (attrs[TIPC_NLA_BEARER_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP], props); if (err) return err; if (props[TIPC_NLA_PROP_PRIO]) prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); } return tipc_enable_bearer(net, bearer, domain, prio, attrs, info->extack); } int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) { int err; rtnl_lock(); err = __tipc_nl_bearer_enable(skb, info); rtnl_unlock(); return err; } int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) { int err; char *name; struct tipc_bearer *b; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; struct net *net = sock_net(skb->sk); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], tipc_nl_bearer_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_BEARER_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); rtnl_lock(); b = tipc_bearer_find(net, name); if (!b) { NL_SET_ERR_MSG(info->extack, "Bearer not found"); err = -EINVAL; goto out; } #ifdef CONFIG_TIPC_MEDIA_UDP if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) { if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { NL_SET_ERR_MSG(info->extack, "UDP option is unsupported"); err = -EINVAL; goto out; } err = tipc_udp_nl_bearer_add(b, attrs[TIPC_NLA_BEARER_UDP_OPTS]); } #endif out: rtnl_unlock(); return err; } int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) { struct tipc_bearer *b; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; struct net *net = sock_net(skb->sk); char *name; int err; if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], tipc_nl_bearer_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_BEARER_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); b = tipc_bearer_find(net, name); if (!b) { NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; } if (attrs[TIPC_NLA_BEARER_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP], props); if (err) return err; if (props[TIPC_NLA_PROP_TOL]) { b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); tipc_node_apply_property(net, b, TIPC_NLA_PROP_TOL); } if (props[TIPC_NLA_PROP_PRIO]) b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (props[TIPC_NLA_PROP_WIN]) b->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (props[TIPC_NLA_PROP_MTU]) { if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { NL_SET_ERR_MSG(info->extack, "MTU property is unsupported"); return -EINVAL; } #ifdef CONFIG_TIPC_MEDIA_UDP if (nla_get_u32(props[TIPC_NLA_PROP_MTU]) < b->encap_hlen + TIPC_MIN_BEARER_MTU) { NL_SET_ERR_MSG(info->extack, "MTU value is out-of-range"); return -EINVAL; } b->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); tipc_node_apply_property(net, b, TIPC_NLA_PROP_MTU); #endif } } return 0; } int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) { int err; rtnl_lock(); err = __tipc_nl_bearer_set(skb, info); rtnl_unlock(); return err; } static int __tipc_nl_add_media(struct tipc_nl_msg *msg, struct tipc_media *media, int nlflags) { void *hdr; struct nlattr *attrs; struct nlattr *prop; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, nlflags, TIPC_NL_MEDIA_GET); if (!hdr) return -EMSGSIZE; attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MEDIA); if (!attrs) goto msg_full; if (nla_put_string(msg->skb, TIPC_NLA_MEDIA_NAME, media->name)) goto attr_msg_full; prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_MEDIA_PROP); if (!prop) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, media->priority)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, media->tolerance)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->max_win)) goto prop_msg_full; if (media->type_id == TIPC_MEDIA_TYPE_UDP) if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, media->mtu)) goto prop_msg_full; nla_nest_end(msg->skb, prop); nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); return 0; prop_msg_full: nla_nest_cancel(msg->skb, prop); attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; } int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; int i = cb->args[0]; struct tipc_nl_msg msg; if (i == MAX_MEDIA) return 0; msg.skb = skb; msg.portid = NETLINK_CB(cb->skb).portid; msg.seq = cb->nlh->nlmsg_seq; rtnl_lock(); for (; media_info_array[i] != NULL; i++) { err = __tipc_nl_add_media(&msg, media_info_array[i], NLM_F_MULTI); if (err) break; } rtnl_unlock(); cb->args[0] = i; return skb->len; } int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) { int err; char *name; struct tipc_nl_msg msg; struct tipc_media *media; struct sk_buff *rep; struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1]; if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MEDIA_MAX, info->attrs[TIPC_NLA_MEDIA], tipc_nl_media_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_MEDIA_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!rep) return -ENOMEM; msg.skb = rep; msg.portid = info->snd_portid; msg.seq = info->snd_seq; rtnl_lock(); media = tipc_media_find(name); if (!media) { NL_SET_ERR_MSG(info->extack, "Media not found"); err = -EINVAL; goto err_out; } err = __tipc_nl_add_media(&msg, media, 0); if (err) goto err_out; rtnl_unlock(); return genlmsg_reply(rep, info); err_out: rtnl_unlock(); nlmsg_free(rep); return err; } int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) { int err; char *name; struct tipc_media *m; struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1]; if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MEDIA_MAX, info->attrs[TIPC_NLA_MEDIA], tipc_nl_media_policy, info->extack); if (!attrs[TIPC_NLA_MEDIA_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); m = tipc_media_find(name); if (!m) { NL_SET_ERR_MSG(info->extack, "Media not found"); return -EINVAL; } if (attrs[TIPC_NLA_MEDIA_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_MEDIA_PROP], props); if (err) return err; if (props[TIPC_NLA_PROP_TOL]) m->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); if (props[TIPC_NLA_PROP_PRIO]) m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (props[TIPC_NLA_PROP_WIN]) m->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (props[TIPC_NLA_PROP_MTU]) { if (m->type_id != TIPC_MEDIA_TYPE_UDP) { NL_SET_ERR_MSG(info->extack, "MTU property is unsupported"); return -EINVAL; } #ifdef CONFIG_TIPC_MEDIA_UDP if (tipc_udp_mtu_bad(nla_get_u32 (props[TIPC_NLA_PROP_MTU]))) { NL_SET_ERR_MSG(info->extack, "MTU value is out-of-range"); return -EINVAL; } m->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); #endif } } return 0; } int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) { int err; rtnl_lock(); err = __tipc_nl_media_set(skb, info); rtnl_unlock(); return err; }
1 1 2 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 // SPDX-License-Identifier: GPL-2.0-or-later /* * * Bluetooth HCI UART driver * * Copyright (C) 2002-2003 Fabrizio Gennari <fabrizio.gennari@philips.com> * Copyright (C) 2004-2005 Marcel Holtmann <marcel@holtmann.org> */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/types.h> #include <linux/fcntl.h> #include <linux/interrupt.h> #include <linux/ptrace.h> #include <linux/poll.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/signal.h> #include <linux/ioctl.h> #include <linux/skbuff.h> #include <linux/bitrev.h> #include <linux/unaligned.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include "hci_uart.h" static bool txcrc = true; static bool hciextn = true; #define BCSP_TXWINSIZE 4 #define BCSP_ACK_PKT 0x05 #define BCSP_LE_PKT 0x06 struct bcsp_struct { struct sk_buff_head unack; /* Unack'ed packets queue */ struct sk_buff_head rel; /* Reliable packets queue */ struct sk_buff_head unrel; /* Unreliable packets queue */ unsigned long rx_count; struct sk_buff *rx_skb; u8 rxseq_txack; /* rxseq == txack. */ u8 rxack; /* Last packet sent by us that the peer ack'ed */ struct timer_list tbcsp; struct hci_uart *hu; enum { BCSP_W4_PKT_DELIMITER, BCSP_W4_PKT_START, BCSP_W4_BCSP_HDR, BCSP_W4_DATA, BCSP_W4_CRC } rx_state; enum { BCSP_ESCSTATE_NOESC, BCSP_ESCSTATE_ESC } rx_esc_state; u8 use_crc; u16 message_crc; u8 txack_req; /* Do we need to send ack's to the peer? */ /* Reliable packet sequence number - used to assign seq to each rel pkt. */ u8 msgq_txseq; }; /* ---- BCSP CRC calculation ---- */ /* Table for calculating CRC for polynomial 0x1021, LSB processed first, * initial value 0xffff, bits shifted in reverse order. */ static const u16 crc_table[] = { 0x0000, 0x1081, 0x2102, 0x3183, 0x4204, 0x5285, 0x6306, 0x7387, 0x8408, 0x9489, 0xa50a, 0xb58b, 0xc60c, 0xd68d, 0xe70e, 0xf78f }; /* Initialise the crc calculator */ #define BCSP_CRC_INIT(x) x = 0xffff /* Update crc with next data byte * * Implementation note * The data byte is treated as two nibbles. The crc is generated * in reverse, i.e., bits are fed into the register from the top. */ static void bcsp_crc_update(u16 *crc, u8 d) { u16 reg = *crc; reg = (reg >> 4) ^ crc_table[(reg ^ d) & 0x000f]; reg = (reg >> 4) ^ crc_table[(reg ^ (d >> 4)) & 0x000f]; *crc = reg; } /* ---- BCSP core ---- */ static void bcsp_slip_msgdelim(struct sk_buff *skb) { const char pkt_delim = 0xc0; skb_put_data(skb, &pkt_delim, 1); } static void bcsp_slip_one_byte(struct sk_buff *skb, u8 c) { const char esc_c0[2] = { 0xdb, 0xdc }; const char esc_db[2] = { 0xdb, 0xdd }; switch (c) { case 0xc0: skb_put_data(skb, &esc_c0, 2); break; case 0xdb: skb_put_data(skb, &esc_db, 2); break; default: skb_put_data(skb, &c, 1); } } static int bcsp_enqueue(struct hci_uart *hu, struct sk_buff *skb) { struct bcsp_struct *bcsp = hu->priv; if (skb->len > 0xFFF) { BT_ERR("Packet too long"); kfree_skb(skb); return 0; } switch (hci_skb_pkt_type(skb)) { case HCI_ACLDATA_PKT: case HCI_COMMAND_PKT: skb_queue_tail(&bcsp->rel, skb); break; case HCI_SCODATA_PKT: skb_queue_tail(&bcsp->unrel, skb); break; default: BT_ERR("Unknown packet type"); kfree_skb(skb); break; } return 0; } static struct sk_buff *bcsp_prepare_pkt(struct bcsp_struct *bcsp, u8 *data, int len, int pkt_type) { struct sk_buff *nskb; u8 hdr[4], chan; u16 BCSP_CRC_INIT(bcsp_txmsg_crc); int rel, i; switch (pkt_type) { case HCI_ACLDATA_PKT: chan = 6; /* BCSP ACL channel */ rel = 1; /* reliable channel */ break; case HCI_COMMAND_PKT: chan = 5; /* BCSP cmd/evt channel */ rel = 1; /* reliable channel */ break; case HCI_SCODATA_PKT: chan = 7; /* BCSP SCO channel */ rel = 0; /* unreliable channel */ break; case BCSP_LE_PKT: chan = 1; /* BCSP LE channel */ rel = 0; /* unreliable channel */ break; case BCSP_ACK_PKT: chan = 0; /* BCSP internal channel */ rel = 0; /* unreliable channel */ break; default: BT_ERR("Unknown packet type"); return NULL; } if (hciextn && chan == 5) { __le16 opcode = ((struct hci_command_hdr *)data)->opcode; /* Vendor specific commands */ if (hci_opcode_ogf(__le16_to_cpu(opcode)) == 0x3f) { u8 desc = *(data + HCI_COMMAND_HDR_SIZE); if ((desc & 0xf0) == 0xc0) { data += HCI_COMMAND_HDR_SIZE + 1; len -= HCI_COMMAND_HDR_SIZE + 1; chan = desc & 0x0f; } } } /* Max len of packet: (original len +4(bcsp hdr) +2(crc))*2 * (because bytes 0xc0 and 0xdb are escaped, worst case is * when the packet is all made of 0xc0 and 0xdb :) ) * + 2 (0xc0 delimiters at start and end). */ nskb = alloc_skb((len + 6) * 2 + 2, GFP_ATOMIC); if (!nskb) return NULL; hci_skb_pkt_type(nskb) = pkt_type; bcsp_slip_msgdelim(nskb); hdr[0] = bcsp->rxseq_txack << 3; bcsp->txack_req = 0; BT_DBG("We request packet no %u to card", bcsp->rxseq_txack); if (rel) { hdr[0] |= 0x80 + bcsp->msgq_txseq; BT_DBG("Sending packet with seqno %u", bcsp->msgq_txseq); bcsp->msgq_txseq = (bcsp->msgq_txseq + 1) & 0x07; } if (bcsp->use_crc) hdr[0] |= 0x40; hdr[1] = ((len << 4) & 0xff) | chan; hdr[2] = len >> 4; hdr[3] = ~(hdr[0] + hdr[1] + hdr[2]); /* Put BCSP header */ for (i = 0; i < 4; i++) { bcsp_slip_one_byte(nskb, hdr[i]); if (bcsp->use_crc) bcsp_crc_update(&bcsp_txmsg_crc, hdr[i]); } /* Put payload */ for (i = 0; i < len; i++) { bcsp_slip_one_byte(nskb, data[i]); if (bcsp->use_crc) bcsp_crc_update(&bcsp_txmsg_crc, data[i]); } /* Put CRC */ if (bcsp->use_crc) { bcsp_txmsg_crc = bitrev16(bcsp_txmsg_crc); bcsp_slip_one_byte(nskb, (u8)((bcsp_txmsg_crc >> 8) & 0x00ff)); bcsp_slip_one_byte(nskb, (u8)(bcsp_txmsg_crc & 0x00ff)); } bcsp_slip_msgdelim(nskb); return nskb; } /* This is a rewrite of pkt_avail in ABCSP */ static struct sk_buff *bcsp_dequeue(struct hci_uart *hu) { struct bcsp_struct *bcsp = hu->priv; unsigned long flags; struct sk_buff *skb; /* First of all, check for unreliable messages in the queue, * since they have priority */ skb = skb_dequeue(&bcsp->unrel); if (skb != NULL) { struct sk_buff *nskb; nskb = bcsp_prepare_pkt(bcsp, skb->data, skb->len, hci_skb_pkt_type(skb)); if (nskb) { kfree_skb(skb); return nskb; } else { skb_queue_head(&bcsp->unrel, skb); BT_ERR("Could not dequeue pkt because alloc_skb failed"); } } /* Now, try to send a reliable pkt. We can only send a * reliable packet if the number of packets sent but not yet ack'ed * is < than the winsize */ spin_lock_irqsave_nested(&bcsp->unack.lock, flags, SINGLE_DEPTH_NESTING); if (bcsp->unack.qlen < BCSP_TXWINSIZE) { skb = skb_dequeue(&bcsp->rel); if (skb != NULL) { struct sk_buff *nskb; nskb = bcsp_prepare_pkt(bcsp, skb->data, skb->len, hci_skb_pkt_type(skb)); if (nskb) { __skb_queue_tail(&bcsp->unack, skb); mod_timer(&bcsp->tbcsp, jiffies + HZ / 4); spin_unlock_irqrestore(&bcsp->unack.lock, flags); return nskb; } else { skb_queue_head(&bcsp->rel, skb); BT_ERR("Could not dequeue pkt because alloc_skb failed"); } } } spin_unlock_irqrestore(&bcsp->unack.lock, flags); /* We could not send a reliable packet, either because there are * none or because there are too many unack'ed pkts. Did we receive * any packets we have not acknowledged yet ? */ if (bcsp->txack_req) { /* if so, craft an empty ACK pkt and send it on BCSP unreliable * channel 0 */ struct sk_buff *nskb = bcsp_prepare_pkt(bcsp, NULL, 0, BCSP_ACK_PKT); return nskb; } /* We have nothing to send */ return NULL; } static int bcsp_flush(struct hci_uart *hu) { BT_DBG("hu %p", hu); return 0; } /* Remove ack'ed packets */ static void bcsp_pkt_cull(struct bcsp_struct *bcsp) { struct sk_buff *skb, *tmp; unsigned long flags; int i, pkts_to_be_removed; u8 seqno; spin_lock_irqsave(&bcsp->unack.lock, flags); pkts_to_be_removed = skb_queue_len(&bcsp->unack); seqno = bcsp->msgq_txseq; while (pkts_to_be_removed) { if (bcsp->rxack == seqno) break; pkts_to_be_removed--; seqno = (seqno - 1) & 0x07; } if (bcsp->rxack != seqno) BT_ERR("Peer acked invalid packet"); BT_DBG("Removing %u pkts out of %u, up to seqno %u", pkts_to_be_removed, skb_queue_len(&bcsp->unack), (seqno - 1) & 0x07); i = 0; skb_queue_walk_safe(&bcsp->unack, skb, tmp) { if (i >= pkts_to_be_removed) break; i++; __skb_unlink(skb, &bcsp->unack); dev_kfree_skb_irq(skb); } if (skb_queue_empty(&bcsp->unack)) timer_delete(&bcsp->tbcsp); spin_unlock_irqrestore(&bcsp->unack.lock, flags); if (i != pkts_to_be_removed) BT_ERR("Removed only %u out of %u pkts", i, pkts_to_be_removed); } /* Handle BCSP link-establishment packets. When we * detect a "sync" packet, symptom that the BT module has reset, * we do nothing :) (yet) */ static void bcsp_handle_le_pkt(struct hci_uart *hu) { struct bcsp_struct *bcsp = hu->priv; u8 conf_pkt[4] = { 0xad, 0xef, 0xac, 0xed }; u8 conf_rsp_pkt[4] = { 0xde, 0xad, 0xd0, 0xd0 }; u8 sync_pkt[4] = { 0xda, 0xdc, 0xed, 0xed }; /* spot "conf" pkts and reply with a "conf rsp" pkt */ if (bcsp->rx_skb->data[1] >> 4 == 4 && bcsp->rx_skb->data[2] == 0 && !memcmp(&bcsp->rx_skb->data[4], conf_pkt, 4)) { struct sk_buff *nskb = alloc_skb(4, GFP_ATOMIC); BT_DBG("Found a LE conf pkt"); if (!nskb) return; skb_put_data(nskb, conf_rsp_pkt, 4); hci_skb_pkt_type(nskb) = BCSP_LE_PKT; skb_queue_head(&bcsp->unrel, nskb); hci_uart_tx_wakeup(hu); } /* Spot "sync" pkts. If we find one...disaster! */ else if (bcsp->rx_skb->data[1] >> 4 == 4 && bcsp->rx_skb->data[2] == 0 && !memcmp(&bcsp->rx_skb->data[4], sync_pkt, 4)) { BT_ERR("Found a LE sync pkt, card has reset"); } } static inline void bcsp_unslip_one_byte(struct bcsp_struct *bcsp, unsigned char byte) { const u8 c0 = 0xc0, db = 0xdb; switch (bcsp->rx_esc_state) { case BCSP_ESCSTATE_NOESC: switch (byte) { case 0xdb: bcsp->rx_esc_state = BCSP_ESCSTATE_ESC; break; default: skb_put_data(bcsp->rx_skb, &byte, 1); if ((bcsp->rx_skb->data[0] & 0x40) != 0 && bcsp->rx_state != BCSP_W4_CRC) bcsp_crc_update(&bcsp->message_crc, byte); bcsp->rx_count--; } break; case BCSP_ESCSTATE_ESC: switch (byte) { case 0xdc: skb_put_data(bcsp->rx_skb, &c0, 1); if ((bcsp->rx_skb->data[0] & 0x40) != 0 && bcsp->rx_state != BCSP_W4_CRC) bcsp_crc_update(&bcsp->message_crc, 0xc0); bcsp->rx_esc_state = BCSP_ESCSTATE_NOESC; bcsp->rx_count--; break; case 0xdd: skb_put_data(bcsp->rx_skb, &db, 1); if ((bcsp->rx_skb->data[0] & 0x40) != 0 && bcsp->rx_state != BCSP_W4_CRC) bcsp_crc_update(&bcsp->message_crc, 0xdb); bcsp->rx_esc_state = BCSP_ESCSTATE_NOESC; bcsp->rx_count--; break; default: BT_ERR("Invalid byte %02x after esc byte", byte); kfree_skb(bcsp->rx_skb); bcsp->rx_skb = NULL; bcsp->rx_state = BCSP_W4_PKT_DELIMITER; bcsp->rx_count = 0; } } } static void bcsp_complete_rx_pkt(struct hci_uart *hu) { struct bcsp_struct *bcsp = hu->priv; int pass_up = 0; if (bcsp->rx_skb->data[0] & 0x80) { /* reliable pkt */ BT_DBG("Received seqno %u from card", bcsp->rxseq_txack); /* check the rx sequence number is as expected */ if ((bcsp->rx_skb->data[0] & 0x07) == bcsp->rxseq_txack) { bcsp->rxseq_txack++; bcsp->rxseq_txack %= 0x8; } else { /* handle re-transmitted packet or * when packet was missed */ BT_ERR("Out-of-order packet arrived, got %u expected %u", bcsp->rx_skb->data[0] & 0x07, bcsp->rxseq_txack); /* do not process out-of-order packet payload */ pass_up = 2; } /* send current txack value to all received reliable packets */ bcsp->txack_req = 1; /* If needed, transmit an ack pkt */ hci_uart_tx_wakeup(hu); } bcsp->rxack = (bcsp->rx_skb->data[0] >> 3) & 0x07; BT_DBG("Request for pkt %u from card", bcsp->rxack); /* handle received ACK indications, * including those from out-of-order packets */ bcsp_pkt_cull(bcsp); if (pass_up != 2) { if ((bcsp->rx_skb->data[1] & 0x0f) == 6 && (bcsp->rx_skb->data[0] & 0x80)) { hci_skb_pkt_type(bcsp->rx_skb) = HCI_ACLDATA_PKT; pass_up = 1; } else if ((bcsp->rx_skb->data[1] & 0x0f) == 5 && (bcsp->rx_skb->data[0] & 0x80)) { hci_skb_pkt_type(bcsp->rx_skb) = HCI_EVENT_PKT; pass_up = 1; } else if ((bcsp->rx_skb->data[1] & 0x0f) == 7) { hci_skb_pkt_type(bcsp->rx_skb) = HCI_SCODATA_PKT; pass_up = 1; } else if ((bcsp->rx_skb->data[1] & 0x0f) == 1 && !(bcsp->rx_skb->data[0] & 0x80)) { bcsp_handle_le_pkt(hu); pass_up = 0; } else { pass_up = 0; } } if (pass_up == 0) { struct hci_event_hdr hdr; u8 desc = (bcsp->rx_skb->data[1] & 0x0f); if (desc != 0 && desc != 1) { if (hciextn) { desc |= 0xc0; skb_pull(bcsp->rx_skb, 4); memcpy(skb_push(bcsp->rx_skb, 1), &desc, 1); hdr.evt = 0xff; hdr.plen = bcsp->rx_skb->len; memcpy(skb_push(bcsp->rx_skb, HCI_EVENT_HDR_SIZE), &hdr, HCI_EVENT_HDR_SIZE); hci_skb_pkt_type(bcsp->rx_skb) = HCI_EVENT_PKT; hci_recv_frame(hu->hdev, bcsp->rx_skb); } else { BT_ERR("Packet for unknown channel (%u %s)", bcsp->rx_skb->data[1] & 0x0f, bcsp->rx_skb->data[0] & 0x80 ? "reliable" : "unreliable"); kfree_skb(bcsp->rx_skb); } } else kfree_skb(bcsp->rx_skb); } else if (pass_up == 1) { /* Pull out BCSP hdr */ skb_pull(bcsp->rx_skb, 4); hci_recv_frame(hu->hdev, bcsp->rx_skb); } else { /* ignore packet payload of already ACKed re-transmitted * packets or when a packet was missed in the BCSP window */ kfree_skb(bcsp->rx_skb); } bcsp->rx_state = BCSP_W4_PKT_DELIMITER; bcsp->rx_skb = NULL; } static u16 bscp_get_crc(struct bcsp_struct *bcsp) { return get_unaligned_be16(&bcsp->rx_skb->data[bcsp->rx_skb->len - 2]); } /* Recv data */ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) { struct bcsp_struct *bcsp = hu->priv; const unsigned char *ptr; if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; BT_DBG("hu %p count %d rx_state %d rx_count %ld", hu, count, bcsp->rx_state, bcsp->rx_count); ptr = data; while (count) { if (bcsp->rx_count) { if (*ptr == 0xc0) { BT_ERR("Short BCSP packet"); kfree_skb(bcsp->rx_skb); bcsp->rx_skb = NULL; bcsp->rx_state = BCSP_W4_PKT_START; bcsp->rx_count = 0; } else bcsp_unslip_one_byte(bcsp, *ptr); ptr++; count--; continue; } switch (bcsp->rx_state) { case BCSP_W4_BCSP_HDR: if ((0xff & (u8)~(bcsp->rx_skb->data[0] + bcsp->rx_skb->data[1] + bcsp->rx_skb->data[2])) != bcsp->rx_skb->data[3]) { BT_ERR("Error in BCSP hdr checksum"); kfree_skb(bcsp->rx_skb); bcsp->rx_skb = NULL; bcsp->rx_state = BCSP_W4_PKT_DELIMITER; bcsp->rx_count = 0; continue; } bcsp->rx_state = BCSP_W4_DATA; bcsp->rx_count = (bcsp->rx_skb->data[1] >> 4) + (bcsp->rx_skb->data[2] << 4); /* May be 0 */ continue; case BCSP_W4_DATA: if (bcsp->rx_skb->data[0] & 0x40) { /* pkt with crc */ bcsp->rx_state = BCSP_W4_CRC; bcsp->rx_count = 2; } else bcsp_complete_rx_pkt(hu); continue; case BCSP_W4_CRC: if (bitrev16(bcsp->message_crc) != bscp_get_crc(bcsp)) { BT_ERR("Checksum failed: computed %04x received %04x", bitrev16(bcsp->message_crc), bscp_get_crc(bcsp)); kfree_skb(bcsp->rx_skb); bcsp->rx_skb = NULL; bcsp->rx_state = BCSP_W4_PKT_DELIMITER; bcsp->rx_count = 0; continue; } skb_trim(bcsp->rx_skb, bcsp->rx_skb->len - 2); bcsp_complete_rx_pkt(hu); continue; case BCSP_W4_PKT_DELIMITER: switch (*ptr) { case 0xc0: bcsp->rx_state = BCSP_W4_PKT_START; break; default: /*BT_ERR("Ignoring byte %02x", *ptr);*/ break; } ptr++; count--; break; case BCSP_W4_PKT_START: switch (*ptr) { case 0xc0: ptr++; count--; break; default: bcsp->rx_state = BCSP_W4_BCSP_HDR; bcsp->rx_count = 4; bcsp->rx_esc_state = BCSP_ESCSTATE_NOESC; BCSP_CRC_INIT(bcsp->message_crc); /* Do not increment ptr or decrement count * Allocate packet. Max len of a BCSP pkt= * 0xFFF (payload) +4 (header) +2 (crc) */ bcsp->rx_skb = bt_skb_alloc(0x1005, GFP_ATOMIC); if (!bcsp->rx_skb) { BT_ERR("Can't allocate mem for new packet"); bcsp->rx_state = BCSP_W4_PKT_DELIMITER; bcsp->rx_count = 0; return 0; } break; } break; } } return count; } /* Arrange to retransmit all messages in the relq. */ static void bcsp_timed_event(struct timer_list *t) { struct bcsp_struct *bcsp = timer_container_of(bcsp, t, tbcsp); struct hci_uart *hu = bcsp->hu; struct sk_buff *skb; unsigned long flags; BT_DBG("hu %p retransmitting %u pkts", hu, bcsp->unack.qlen); spin_lock_irqsave_nested(&bcsp->unack.lock, flags, SINGLE_DEPTH_NESTING); while ((skb = __skb_dequeue_tail(&bcsp->unack)) != NULL) { bcsp->msgq_txseq = (bcsp->msgq_txseq - 1) & 0x07; skb_queue_head(&bcsp->rel, skb); } spin_unlock_irqrestore(&bcsp->unack.lock, flags); hci_uart_tx_wakeup(hu); } static int bcsp_open(struct hci_uart *hu) { struct bcsp_struct *bcsp; BT_DBG("hu %p", hu); bcsp = kzalloc(sizeof(*bcsp), GFP_KERNEL); if (!bcsp) return -ENOMEM; hu->priv = bcsp; bcsp->hu = hu; skb_queue_head_init(&bcsp->unack); skb_queue_head_init(&bcsp->rel); skb_queue_head_init(&bcsp->unrel); timer_setup(&bcsp->tbcsp, bcsp_timed_event, 0); bcsp->rx_state = BCSP_W4_PKT_DELIMITER; if (txcrc) bcsp->use_crc = 1; return 0; } static int bcsp_close(struct hci_uart *hu) { struct bcsp_struct *bcsp = hu->priv; timer_shutdown_sync(&bcsp->tbcsp); hu->priv = NULL; BT_DBG("hu %p", hu); skb_queue_purge(&bcsp->unack); skb_queue_purge(&bcsp->rel); skb_queue_purge(&bcsp->unrel); if (bcsp->rx_skb) { kfree_skb(bcsp->rx_skb); bcsp->rx_skb = NULL; } kfree(bcsp); return 0; } static const struct hci_uart_proto bcsp = { .id = HCI_UART_BCSP, .name = "BCSP", .open = bcsp_open, .close = bcsp_close, .enqueue = bcsp_enqueue, .dequeue = bcsp_dequeue, .recv = bcsp_recv, .flush = bcsp_flush }; int __init bcsp_init(void) { return hci_uart_register_proto(&bcsp); } int __exit bcsp_deinit(void) { return hci_uart_unregister_proto(&bcsp); } module_param(txcrc, bool, 0644); MODULE_PARM_DESC(txcrc, "Transmit CRC with every BCSP packet"); module_param(hciextn, bool, 0644); MODULE_PARM_DESC(hciextn, "Convert HCI Extensions into BCSP packets");
2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 // SPDX-License-Identifier: GPL-2.0+ /* * LEGO USB Tower driver * * Copyright (C) 2003 David Glance <davidgsf@sourceforge.net> * 2001-2004 Juergen Stuber <starblue@users.sourceforge.net> * * derived from USB Skeleton driver - 0.5 * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com) * * History: * * 2001-10-13 - 0.1 js * - first version * 2001-11-03 - 0.2 js * - simplified buffering, one-shot URBs for writing * 2001-11-10 - 0.3 js * - removed IOCTL (setting power/mode is more complicated, postponed) * 2001-11-28 - 0.4 js * - added vendor commands for mode of operation and power level in open * 2001-12-04 - 0.5 js * - set IR mode by default (by oversight 0.4 set VLL mode) * 2002-01-11 - 0.5? pcchan * - make read buffer reusable and work around bytes_to_write issue between * uhci and legusbtower * 2002-09-23 - 0.52 david (david@csse.uwa.edu.au) * - imported into lejos project * - changed wake_up to wake_up_interruptible * - changed to use lego0 rather than tower0 * - changed dbg() to use __func__ rather than deprecated __func__ * 2003-01-12 - 0.53 david (david@csse.uwa.edu.au) * - changed read and write to write everything or * timeout (from a patch by Chris Riesen and Brett Thaeler driver) * - added ioctl functionality to set timeouts * 2003-07-18 - 0.54 davidgsf (david@csse.uwa.edu.au) * - initial import into LegoUSB project * - merge of existing LegoUSB.c driver * 2003-07-18 - 0.56 davidgsf (david@csse.uwa.edu.au) * - port to 2.6 style driver * 2004-02-29 - 0.6 Juergen Stuber <starblue@users.sourceforge.net> * - fix locking * - unlink read URBs which are no longer needed * - allow increased buffer size, eliminates need for timeout on write * - have read URB running continuously * - added poll * - forbid seeking * - added nonblocking I/O * - changed back __func__ to __func__ * - read and log tower firmware version * - reset tower on probe, avoids failure of first write * 2004-03-09 - 0.7 Juergen Stuber <starblue@users.sourceforge.net> * - timeout read now only after inactivity, shorten default accordingly * 2004-03-11 - 0.8 Juergen Stuber <starblue@users.sourceforge.net> * - log major, minor instead of possibly confusing device filename * - whitespace cleanup * 2004-03-12 - 0.9 Juergen Stuber <starblue@users.sourceforge.net> * - normalize whitespace in debug messages * - take care about endianness in control message responses * 2004-03-13 - 0.91 Juergen Stuber <starblue@users.sourceforge.net> * - make default intervals longer to accommodate current EHCI driver * 2004-03-19 - 0.92 Juergen Stuber <starblue@users.sourceforge.net> * - replaced atomic_t by memory barriers * 2004-04-21 - 0.93 Juergen Stuber <starblue@users.sourceforge.net> * - wait for completion of write urb in release (needed for remotecontrol) * - corrected poll for write direction (missing negation) * 2004-04-22 - 0.94 Juergen Stuber <starblue@users.sourceforge.net> * - make device locking interruptible * 2004-04-30 - 0.95 Juergen Stuber <starblue@users.sourceforge.net> * - check for valid udev on resubmitting and unlinking urbs * 2004-08-03 - 0.96 Juergen Stuber <starblue@users.sourceforge.net> * - move reset into open to clean out spurious data */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/completion.h> #include <linux/mutex.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/poll.h> #define DRIVER_AUTHOR "Juergen Stuber <starblue@sourceforge.net>" #define DRIVER_DESC "LEGO USB Tower Driver" /* The defaults are chosen to work with the latest versions of leJOS and NQC. */ /* Some legacy software likes to receive packets in one piece. * In this case read_buffer_size should exceed the maximal packet length * (417 for datalog uploads), and packet_timeout should be set. */ static int read_buffer_size = 480; module_param(read_buffer_size, int, 0); MODULE_PARM_DESC(read_buffer_size, "Read buffer size"); /* Some legacy software likes to send packets in one piece. * In this case write_buffer_size should exceed the maximal packet length * (417 for firmware and program downloads). * A problem with long writes is that the following read may time out * if the software is not prepared to wait long enough. */ static int write_buffer_size = 480; module_param(write_buffer_size, int, 0); MODULE_PARM_DESC(write_buffer_size, "Write buffer size"); /* Some legacy software expects reads to contain whole LASM packets. * To achieve this, characters which arrive before a packet timeout * occurs will be returned in a single read operation. * A problem with long reads is that the software may time out * if it is not prepared to wait long enough. * The packet timeout should be greater than the time between the * reception of subsequent characters, which should arrive about * every 5ms for the standard 2400 baud. * Set it to 0 to disable. */ static int packet_timeout = 50; module_param(packet_timeout, int, 0); MODULE_PARM_DESC(packet_timeout, "Packet timeout in ms"); /* Some legacy software expects blocking reads to time out. * Timeout occurs after the specified time of read and write inactivity. * Set it to 0 to disable. */ static int read_timeout = 200; module_param(read_timeout, int, 0); MODULE_PARM_DESC(read_timeout, "Read timeout in ms"); /* As of kernel version 2.6.4 ehci-hcd uses an * "only one interrupt transfer per frame" shortcut * to simplify the scheduling of periodic transfers. * This conflicts with our standard 1ms intervals for in and out URBs. * We use default intervals of 2ms for in and 8ms for out transfers, * which is fast enough for 2400 baud and allows a small additional load. * Increase the interval to allow more devices that do interrupt transfers, * or set to 0 to use the standard interval from the endpoint descriptors. */ static int interrupt_in_interval = 2; module_param(interrupt_in_interval, int, 0); MODULE_PARM_DESC(interrupt_in_interval, "Interrupt in interval in ms"); static int interrupt_out_interval = 8; module_param(interrupt_out_interval, int, 0); MODULE_PARM_DESC(interrupt_out_interval, "Interrupt out interval in ms"); /* Define these values to match your device */ #define LEGO_USB_TOWER_VENDOR_ID 0x0694 #define LEGO_USB_TOWER_PRODUCT_ID 0x0001 /* Vendor requests */ #define LEGO_USB_TOWER_REQUEST_RESET 0x04 #define LEGO_USB_TOWER_REQUEST_GET_VERSION 0xFD struct tower_reset_reply { __le16 size; __u8 err_code; __u8 spare; }; struct tower_get_version_reply { __le16 size; __u8 err_code; __u8 spare; __u8 major; __u8 minor; __le16 build_no; }; /* table of devices that work with this driver */ static const struct usb_device_id tower_table[] = { { USB_DEVICE(LEGO_USB_TOWER_VENDOR_ID, LEGO_USB_TOWER_PRODUCT_ID) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, tower_table); #define LEGO_USB_TOWER_MINOR_BASE 160 /* Structure to hold all of our device specific stuff */ struct lego_usb_tower { struct mutex lock; /* locks this structure */ struct usb_device *udev; /* save off the usb device pointer */ unsigned char minor; /* the starting minor number for this device */ int open_count; /* number of times this port has been opened */ unsigned long disconnected:1; char *read_buffer; size_t read_buffer_length; /* this much came in */ size_t read_packet_length; /* this much will be returned on read */ spinlock_t read_buffer_lock; int packet_timeout_jiffies; unsigned long read_last_arrival; wait_queue_head_t read_wait; wait_queue_head_t write_wait; char *interrupt_in_buffer; struct usb_endpoint_descriptor *interrupt_in_endpoint; struct urb *interrupt_in_urb; int interrupt_in_interval; int interrupt_in_done; char *interrupt_out_buffer; struct usb_endpoint_descriptor *interrupt_out_endpoint; struct urb *interrupt_out_urb; int interrupt_out_interval; int interrupt_out_busy; }; /* local function prototypes */ static ssize_t tower_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos); static ssize_t tower_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos); static inline void tower_delete(struct lego_usb_tower *dev); static int tower_open(struct inode *inode, struct file *file); static int tower_release(struct inode *inode, struct file *file); static __poll_t tower_poll(struct file *file, poll_table *wait); static loff_t tower_llseek(struct file *file, loff_t off, int whence); static void tower_check_for_read_packet(struct lego_usb_tower *dev); static void tower_interrupt_in_callback(struct urb *urb); static void tower_interrupt_out_callback(struct urb *urb); static int tower_probe(struct usb_interface *interface, const struct usb_device_id *id); static void tower_disconnect(struct usb_interface *interface); /* file operations needed when we register this driver */ static const struct file_operations tower_fops = { .owner = THIS_MODULE, .read = tower_read, .write = tower_write, .open = tower_open, .release = tower_release, .poll = tower_poll, .llseek = tower_llseek, }; static char *legousbtower_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev)); } /* * usb class driver info in order to get a minor number from the usb core, * and to have the device registered with the driver core */ static struct usb_class_driver tower_class = { .name = "legousbtower%d", .devnode = legousbtower_devnode, .fops = &tower_fops, .minor_base = LEGO_USB_TOWER_MINOR_BASE, }; /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver tower_driver = { .name = "legousbtower", .probe = tower_probe, .disconnect = tower_disconnect, .id_table = tower_table, }; /* * lego_usb_tower_debug_data */ static inline void lego_usb_tower_debug_data(struct device *dev, const char *function, int size, const unsigned char *data) { dev_dbg(dev, "%s - length = %d, data = %*ph\n", function, size, size, data); } /* * tower_delete */ static inline void tower_delete(struct lego_usb_tower *dev) { /* free data structures */ usb_free_urb(dev->interrupt_in_urb); usb_free_urb(dev->interrupt_out_urb); kfree(dev->read_buffer); kfree(dev->interrupt_in_buffer); kfree(dev->interrupt_out_buffer); usb_put_dev(dev->udev); kfree(dev); } /* * tower_open */ static int tower_open(struct inode *inode, struct file *file) { struct lego_usb_tower *dev = NULL; int subminor; int retval = 0; struct usb_interface *interface; struct tower_reset_reply reset_reply; int result; nonseekable_open(inode, file); subminor = iminor(inode); interface = usb_find_interface(&tower_driver, subminor); if (!interface) { pr_err("error, can't find device for minor %d\n", subminor); retval = -ENODEV; goto exit; } dev = usb_get_intfdata(interface); if (!dev) { retval = -ENODEV; goto exit; } /* lock this device */ if (mutex_lock_interruptible(&dev->lock)) { retval = -ERESTARTSYS; goto exit; } /* allow opening only once */ if (dev->open_count) { retval = -EBUSY; goto unlock_exit; } /* reset the tower */ result = usb_control_msg_recv(dev->udev, 0, LEGO_USB_TOWER_REQUEST_RESET, USB_TYPE_VENDOR | USB_DIR_IN | USB_RECIP_DEVICE, 0, 0, &reset_reply, sizeof(reset_reply), 1000, GFP_KERNEL); if (result < 0) { dev_err(&dev->udev->dev, "LEGO USB Tower reset control request failed\n"); retval = result; goto unlock_exit; } /* initialize in direction */ dev->read_buffer_length = 0; dev->read_packet_length = 0; usb_fill_int_urb(dev->interrupt_in_urb, dev->udev, usb_rcvintpipe(dev->udev, dev->interrupt_in_endpoint->bEndpointAddress), dev->interrupt_in_buffer, usb_endpoint_maxp(dev->interrupt_in_endpoint), tower_interrupt_in_callback, dev, dev->interrupt_in_interval); dev->interrupt_in_done = 0; mb(); retval = usb_submit_urb(dev->interrupt_in_urb, GFP_KERNEL); if (retval) { dev_err(&dev->udev->dev, "Couldn't submit interrupt_in_urb %d\n", retval); goto unlock_exit; } /* save device in the file's private structure */ file->private_data = dev; dev->open_count = 1; unlock_exit: mutex_unlock(&dev->lock); exit: return retval; } /* * tower_release */ static int tower_release(struct inode *inode, struct file *file) { struct lego_usb_tower *dev; int retval = 0; dev = file->private_data; if (dev == NULL) { retval = -ENODEV; goto exit; } mutex_lock(&dev->lock); if (dev->disconnected) { /* the device was unplugged before the file was released */ /* unlock here as tower_delete frees dev */ mutex_unlock(&dev->lock); tower_delete(dev); goto exit; } /* wait until write transfer is finished */ if (dev->interrupt_out_busy) { wait_event_interruptible_timeout(dev->write_wait, !dev->interrupt_out_busy, 2 * HZ); } /* shutdown transfers */ usb_kill_urb(dev->interrupt_in_urb); usb_kill_urb(dev->interrupt_out_urb); dev->open_count = 0; mutex_unlock(&dev->lock); exit: return retval; } /* * tower_check_for_read_packet * * To get correct semantics for signals and non-blocking I/O * with packetizing we pretend not to see any data in the read buffer * until it has been there unchanged for at least * dev->packet_timeout_jiffies, or until the buffer is full. */ static void tower_check_for_read_packet(struct lego_usb_tower *dev) { spin_lock_irq(&dev->read_buffer_lock); if (!packet_timeout || time_after(jiffies, dev->read_last_arrival + dev->packet_timeout_jiffies) || dev->read_buffer_length == read_buffer_size) { dev->read_packet_length = dev->read_buffer_length; } dev->interrupt_in_done = 0; spin_unlock_irq(&dev->read_buffer_lock); } /* * tower_poll */ static __poll_t tower_poll(struct file *file, poll_table *wait) { struct lego_usb_tower *dev; __poll_t mask = 0; dev = file->private_data; if (dev->disconnected) return EPOLLERR | EPOLLHUP; poll_wait(file, &dev->read_wait, wait); poll_wait(file, &dev->write_wait, wait); tower_check_for_read_packet(dev); if (dev->read_packet_length > 0) mask |= EPOLLIN | EPOLLRDNORM; if (!dev->interrupt_out_busy) mask |= EPOLLOUT | EPOLLWRNORM; return mask; } /* * tower_llseek */ static loff_t tower_llseek(struct file *file, loff_t off, int whence) { return -ESPIPE; /* unseekable */ } /* * tower_read */ static ssize_t tower_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct lego_usb_tower *dev; size_t bytes_to_read; int i; int retval = 0; unsigned long timeout = 0; dev = file->private_data; /* lock this object */ if (mutex_lock_interruptible(&dev->lock)) { retval = -ERESTARTSYS; goto exit; } /* verify that the device wasn't unplugged */ if (dev->disconnected) { retval = -ENODEV; goto unlock_exit; } /* verify that we actually have some data to read */ if (count == 0) { dev_dbg(&dev->udev->dev, "read request of 0 bytes\n"); goto unlock_exit; } if (read_timeout) timeout = jiffies + msecs_to_jiffies(read_timeout); /* wait for data */ tower_check_for_read_packet(dev); while (dev->read_packet_length == 0) { if (file->f_flags & O_NONBLOCK) { retval = -EAGAIN; goto unlock_exit; } retval = wait_event_interruptible_timeout(dev->read_wait, dev->interrupt_in_done, dev->packet_timeout_jiffies); if (retval < 0) goto unlock_exit; /* reset read timeout during read or write activity */ if (read_timeout && (dev->read_buffer_length || dev->interrupt_out_busy)) { timeout = jiffies + msecs_to_jiffies(read_timeout); } /* check for read timeout */ if (read_timeout && time_after(jiffies, timeout)) { retval = -ETIMEDOUT; goto unlock_exit; } tower_check_for_read_packet(dev); } /* copy the data from read_buffer into userspace */ bytes_to_read = min(count, dev->read_packet_length); if (copy_to_user(buffer, dev->read_buffer, bytes_to_read)) { retval = -EFAULT; goto unlock_exit; } spin_lock_irq(&dev->read_buffer_lock); dev->read_buffer_length -= bytes_to_read; dev->read_packet_length -= bytes_to_read; for (i = 0; i < dev->read_buffer_length; i++) dev->read_buffer[i] = dev->read_buffer[i+bytes_to_read]; spin_unlock_irq(&dev->read_buffer_lock); retval = bytes_to_read; unlock_exit: /* unlock the device */ mutex_unlock(&dev->lock); exit: return retval; } /* * tower_write */ static ssize_t tower_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { struct lego_usb_tower *dev; size_t bytes_to_write; int retval = 0; dev = file->private_data; /* lock this object */ if (mutex_lock_interruptible(&dev->lock)) { retval = -ERESTARTSYS; goto exit; } /* verify that the device wasn't unplugged */ if (dev->disconnected) { retval = -ENODEV; goto unlock_exit; } /* verify that we actually have some data to write */ if (count == 0) { dev_dbg(&dev->udev->dev, "write request of 0 bytes\n"); goto unlock_exit; } /* wait until previous transfer is finished */ while (dev->interrupt_out_busy) { if (file->f_flags & O_NONBLOCK) { retval = -EAGAIN; goto unlock_exit; } retval = wait_event_interruptible(dev->write_wait, !dev->interrupt_out_busy); if (retval) goto unlock_exit; } /* write the data into interrupt_out_buffer from userspace */ bytes_to_write = min_t(int, count, write_buffer_size); dev_dbg(&dev->udev->dev, "%s: count = %zd, bytes_to_write = %zd\n", __func__, count, bytes_to_write); if (copy_from_user(dev->interrupt_out_buffer, buffer, bytes_to_write)) { retval = -EFAULT; goto unlock_exit; } /* send off the urb */ usb_fill_int_urb(dev->interrupt_out_urb, dev->udev, usb_sndintpipe(dev->udev, dev->interrupt_out_endpoint->bEndpointAddress), dev->interrupt_out_buffer, bytes_to_write, tower_interrupt_out_callback, dev, dev->interrupt_out_interval); dev->interrupt_out_busy = 1; wmb(); retval = usb_submit_urb(dev->interrupt_out_urb, GFP_KERNEL); if (retval) { dev->interrupt_out_busy = 0; dev_err(&dev->udev->dev, "Couldn't submit interrupt_out_urb %d\n", retval); goto unlock_exit; } retval = bytes_to_write; unlock_exit: /* unlock the device */ mutex_unlock(&dev->lock); exit: return retval; } /* * tower_interrupt_in_callback */ static void tower_interrupt_in_callback(struct urb *urb) { struct lego_usb_tower *dev = urb->context; int status = urb->status; int retval; unsigned long flags; lego_usb_tower_debug_data(&dev->udev->dev, __func__, urb->actual_length, urb->transfer_buffer); if (status) { if (status == -ENOENT || status == -ECONNRESET || status == -ESHUTDOWN) { goto exit; } else { dev_dbg(&dev->udev->dev, "%s: nonzero status received: %d\n", __func__, status); goto resubmit; /* maybe we can recover */ } } if (urb->actual_length > 0) { spin_lock_irqsave(&dev->read_buffer_lock, flags); if (dev->read_buffer_length + urb->actual_length < read_buffer_size) { memcpy(dev->read_buffer + dev->read_buffer_length, dev->interrupt_in_buffer, urb->actual_length); dev->read_buffer_length += urb->actual_length; dev->read_last_arrival = jiffies; dev_dbg(&dev->udev->dev, "%s: received %d bytes\n", __func__, urb->actual_length); } else { pr_warn("read_buffer overflow, %d bytes dropped\n", urb->actual_length); } spin_unlock_irqrestore(&dev->read_buffer_lock, flags); } resubmit: retval = usb_submit_urb(dev->interrupt_in_urb, GFP_ATOMIC); if (retval) { dev_err(&dev->udev->dev, "%s: usb_submit_urb failed (%d)\n", __func__, retval); } exit: dev->interrupt_in_done = 1; wake_up_interruptible(&dev->read_wait); } /* * tower_interrupt_out_callback */ static void tower_interrupt_out_callback(struct urb *urb) { struct lego_usb_tower *dev = urb->context; int status = urb->status; lego_usb_tower_debug_data(&dev->udev->dev, __func__, urb->actual_length, urb->transfer_buffer); /* sync/async unlink faults aren't errors */ if (status && !(status == -ENOENT || status == -ECONNRESET || status == -ESHUTDOWN)) { dev_dbg(&dev->udev->dev, "%s: nonzero write bulk status received: %d\n", __func__, status); } dev->interrupt_out_busy = 0; wake_up_interruptible(&dev->write_wait); } /* * tower_probe * * Called by the usb core when a new device is connected that it thinks * this driver might be interested in. */ static int tower_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct device *idev = &interface->dev; struct usb_device *udev = interface_to_usbdev(interface); struct lego_usb_tower *dev; struct tower_get_version_reply get_version_reply; int retval = -ENOMEM; int result; /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) goto exit; mutex_init(&dev->lock); dev->udev = usb_get_dev(udev); spin_lock_init(&dev->read_buffer_lock); dev->packet_timeout_jiffies = msecs_to_jiffies(packet_timeout); dev->read_last_arrival = jiffies; init_waitqueue_head(&dev->read_wait); init_waitqueue_head(&dev->write_wait); result = usb_find_common_endpoints_reverse(interface->cur_altsetting, NULL, NULL, &dev->interrupt_in_endpoint, &dev->interrupt_out_endpoint); if (result) { dev_err(idev, "interrupt endpoints not found\n"); retval = result; goto error; } dev->read_buffer = kmalloc(read_buffer_size, GFP_KERNEL); if (!dev->read_buffer) goto error; dev->interrupt_in_buffer = kmalloc(usb_endpoint_maxp(dev->interrupt_in_endpoint), GFP_KERNEL); if (!dev->interrupt_in_buffer) goto error; dev->interrupt_in_urb = usb_alloc_urb(0, GFP_KERNEL); if (!dev->interrupt_in_urb) goto error; dev->interrupt_out_buffer = kmalloc(write_buffer_size, GFP_KERNEL); if (!dev->interrupt_out_buffer) goto error; dev->interrupt_out_urb = usb_alloc_urb(0, GFP_KERNEL); if (!dev->interrupt_out_urb) goto error; dev->interrupt_in_interval = interrupt_in_interval ? interrupt_in_interval : dev->interrupt_in_endpoint->bInterval; dev->interrupt_out_interval = interrupt_out_interval ? interrupt_out_interval : dev->interrupt_out_endpoint->bInterval; /* get the firmware version and log it */ result = usb_control_msg_recv(udev, 0, LEGO_USB_TOWER_REQUEST_GET_VERSION, USB_TYPE_VENDOR | USB_DIR_IN | USB_RECIP_DEVICE, 0, 0, &get_version_reply, sizeof(get_version_reply), 1000, GFP_KERNEL); if (result) { dev_err(idev, "get version request failed: %d\n", result); retval = result; goto error; } dev_info(&interface->dev, "LEGO USB Tower firmware version is %d.%d build %d\n", get_version_reply.major, get_version_reply.minor, le16_to_cpu(get_version_reply.build_no)); /* we can register the device now, as it is ready */ usb_set_intfdata(interface, dev); retval = usb_register_dev(interface, &tower_class); if (retval) { /* something prevented us from registering this driver */ dev_err(idev, "Not able to get a minor for this device.\n"); goto error; } dev->minor = interface->minor; /* let the user know what node this device is now attached to */ dev_info(&interface->dev, "LEGO USB Tower #%d now attached to major " "%d minor %d\n", (dev->minor - LEGO_USB_TOWER_MINOR_BASE), USB_MAJOR, dev->minor); exit: return retval; error: tower_delete(dev); return retval; } /* * tower_disconnect * * Called by the usb core when the device is removed from the system. */ static void tower_disconnect(struct usb_interface *interface) { struct lego_usb_tower *dev; int minor; dev = usb_get_intfdata(interface); minor = dev->minor; /* give back our minor and prevent further open() */ usb_deregister_dev(interface, &tower_class); /* stop I/O */ usb_poison_urb(dev->interrupt_in_urb); usb_poison_urb(dev->interrupt_out_urb); mutex_lock(&dev->lock); /* if the device is not opened, then we clean up right now */ if (!dev->open_count) { mutex_unlock(&dev->lock); tower_delete(dev); } else { dev->disconnected = 1; /* wake up pollers */ wake_up_interruptible_all(&dev->read_wait); wake_up_interruptible_all(&dev->write_wait); mutex_unlock(&dev->lock); } dev_info(&interface->dev, "LEGO USB Tower #%d now disconnected\n", (minor - LEGO_USB_TOWER_MINOR_BASE)); } module_usb_driver(tower_driver); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL");
19 14 19 2 14 16 8 14 1 14 14 14 14 4 12 12 11 12 3 5 5 5 5 5 5 5 5 5 5 5 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 /* * hw_random/core.c: HWRNG core API * * Copyright 2006 Michael Buesch <m@bues.ch> * Copyright 2005 (c) MontaVista Software, Inc. * * Please read Documentation/admin-guide/hw_random.rst for details on use. * * This software may be used and distributed according to the terms * of the GNU General Public License, incorporated herein by reference. */ #include <linux/delay.h> #include <linux/device.h> #include <linux/err.h> #include <linux/fs.h> #include <linux/hw_random.h> #include <linux/kernel.h> #include <linux/kthread.h> #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/random.h> #include <linux/sched.h> #include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/uaccess.h> #define RNG_MODULE_NAME "hw_random" #define RNG_BUFFER_SIZE (SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES) static struct hwrng *current_rng; /* the current rng has been explicitly chosen by user via sysfs */ static int cur_rng_set_by_user; static struct task_struct *hwrng_fill; /* list of registered rngs */ static LIST_HEAD(rng_list); /* Protects rng_list and current_rng */ static DEFINE_MUTEX(rng_mutex); /* Protects rng read functions, data_avail, rng_buffer and rng_fillbuf */ static DEFINE_MUTEX(reading_mutex); static int data_avail; static u8 *rng_buffer, *rng_fillbuf; static unsigned short current_quality; static unsigned short default_quality = 1024; /* default to maximum */ module_param(current_quality, ushort, 0644); MODULE_PARM_DESC(current_quality, "current hwrng entropy estimation per 1024 bits of input -- obsolete, use rng_quality instead"); module_param(default_quality, ushort, 0644); MODULE_PARM_DESC(default_quality, "default maximum entropy content of hwrng per 1024 bits of input"); static void drop_current_rng(void); static int hwrng_init(struct hwrng *rng); static int hwrng_fillfn(void *unused); static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, int wait); static size_t rng_buffer_size(void) { return RNG_BUFFER_SIZE; } static inline void cleanup_rng(struct kref *kref) { struct hwrng *rng = container_of(kref, struct hwrng, ref); if (rng->cleanup) rng->cleanup(rng); complete(&rng->cleanup_done); } static int set_current_rng(struct hwrng *rng) { int err; BUG_ON(!mutex_is_locked(&rng_mutex)); err = hwrng_init(rng); if (err) return err; drop_current_rng(); current_rng = rng; /* if necessary, start hwrng thread */ if (!hwrng_fill) { hwrng_fill = kthread_run(hwrng_fillfn, NULL, "hwrng"); if (IS_ERR(hwrng_fill)) { pr_err("hwrng_fill thread creation failed\n"); hwrng_fill = NULL; } } return 0; } static void drop_current_rng(void) { BUG_ON(!mutex_is_locked(&rng_mutex)); if (!current_rng) return; /* decrease last reference for triggering the cleanup */ kref_put(&current_rng->ref, cleanup_rng); current_rng = NULL; } /* Returns ERR_PTR(), NULL or refcounted hwrng */ static struct hwrng *get_current_rng_nolock(void) { if (current_rng) kref_get(&current_rng->ref); return current_rng; } static struct hwrng *get_current_rng(void) { struct hwrng *rng; if (mutex_lock_interruptible(&rng_mutex)) return ERR_PTR(-ERESTARTSYS); rng = get_current_rng_nolock(); mutex_unlock(&rng_mutex); return rng; } static void put_rng(struct hwrng *rng) { /* * Hold rng_mutex here so we serialize in case they set_current_rng * on rng again immediately. */ mutex_lock(&rng_mutex); if (rng) kref_put(&rng->ref, cleanup_rng); mutex_unlock(&rng_mutex); } static int hwrng_init(struct hwrng *rng) { if (kref_get_unless_zero(&rng->ref)) goto skip_init; if (rng->init) { int ret; ret = rng->init(rng); if (ret) return ret; } kref_init(&rng->ref); reinit_completion(&rng->cleanup_done); skip_init: current_quality = rng->quality; /* obsolete */ return 0; } static int rng_dev_open(struct inode *inode, struct file *filp) { /* enforce read-only access to this chrdev */ if ((filp->f_mode & FMODE_READ) == 0) return -EINVAL; if (filp->f_mode & FMODE_WRITE) return -EINVAL; return 0; } static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, int wait) { int present; BUG_ON(!mutex_is_locked(&reading_mutex)); if (rng->read) { int err; err = rng->read(rng, buffer, size, wait); if (WARN_ON_ONCE(err > 0 && err > size)) err = size; return err; } if (rng->data_present) present = rng->data_present(rng, wait); else present = 1; if (present) return rng->data_read(rng, (u32 *)buffer); return 0; } static ssize_t rng_dev_read(struct file *filp, char __user *buf, size_t size, loff_t *offp) { u8 buffer[RNG_BUFFER_SIZE]; ssize_t ret = 0; int err = 0; int bytes_read, len; struct hwrng *rng; while (size) { rng = get_current_rng(); if (IS_ERR(rng)) { err = PTR_ERR(rng); goto out; } if (!rng) { err = -ENODEV; goto out; } if (mutex_lock_interruptible(&reading_mutex)) { err = -ERESTARTSYS; goto out_put; } if (!data_avail) { bytes_read = rng_get_data(rng, rng_buffer, rng_buffer_size(), !(filp->f_flags & O_NONBLOCK)); if (bytes_read < 0) { err = bytes_read; goto out_unlock_reading; } else if (bytes_read == 0 && (filp->f_flags & O_NONBLOCK)) { err = -EAGAIN; goto out_unlock_reading; } data_avail = bytes_read; } len = data_avail; if (len) { if (len > size) len = size; data_avail -= len; memcpy(buffer, rng_buffer + data_avail, len); } mutex_unlock(&reading_mutex); put_rng(rng); if (len) { if (copy_to_user(buf + ret, buffer, len)) { err = -EFAULT; goto out; } size -= len; ret += len; } if (need_resched()) schedule_timeout_interruptible(1); if (signal_pending(current)) { err = -ERESTARTSYS; goto out; } } out: memzero_explicit(buffer, sizeof(buffer)); return ret ? : err; out_unlock_reading: mutex_unlock(&reading_mutex); out_put: put_rng(rng); goto out; } static const struct file_operations rng_chrdev_ops = { .owner = THIS_MODULE, .open = rng_dev_open, .read = rng_dev_read, .llseek = noop_llseek, }; static const struct attribute_group *rng_dev_groups[]; static struct miscdevice rng_miscdev = { .minor = HWRNG_MINOR, .name = RNG_MODULE_NAME, .nodename = "hwrng", .fops = &rng_chrdev_ops, .groups = rng_dev_groups, }; static int enable_best_rng(void) { struct hwrng *rng, *new_rng = NULL; int ret = -ENODEV; BUG_ON(!mutex_is_locked(&rng_mutex)); /* no rng to use? */ if (list_empty(&rng_list)) { drop_current_rng(); cur_rng_set_by_user = 0; return 0; } /* use the rng which offers the best quality */ list_for_each_entry(rng, &rng_list, list) { if (!new_rng || rng->quality > new_rng->quality) new_rng = rng; } ret = ((new_rng == current_rng) ? 0 : set_current_rng(new_rng)); if (!ret) cur_rng_set_by_user = 0; return ret; } static ssize_t rng_current_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { int err; struct hwrng *rng, *new_rng; err = mutex_lock_interruptible(&rng_mutex); if (err) return -ERESTARTSYS; if (sysfs_streq(buf, "")) { err = enable_best_rng(); } else { list_for_each_entry(rng, &rng_list, list) { if (sysfs_streq(rng->name, buf)) { err = set_current_rng(rng); if (!err) cur_rng_set_by_user = 1; break; } } } new_rng = get_current_rng_nolock(); mutex_unlock(&rng_mutex); if (new_rng) put_rng(new_rng); return err ? : len; } static ssize_t rng_current_show(struct device *dev, struct device_attribute *attr, char *buf) { ssize_t ret; struct hwrng *rng; rng = get_current_rng(); if (IS_ERR(rng)) return PTR_ERR(rng); ret = sysfs_emit(buf, "%s\n", rng ? rng->name : "none"); put_rng(rng); return ret; } static ssize_t rng_available_show(struct device *dev, struct device_attribute *attr, char *buf) { int err; struct hwrng *rng; err = mutex_lock_interruptible(&rng_mutex); if (err) return -ERESTARTSYS; buf[0] = '\0'; list_for_each_entry(rng, &rng_list, list) { strlcat(buf, rng->name, PAGE_SIZE); strlcat(buf, " ", PAGE_SIZE); } strlcat(buf, "\n", PAGE_SIZE); mutex_unlock(&rng_mutex); return strlen(buf); } static ssize_t rng_selected_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", cur_rng_set_by_user); } static ssize_t rng_quality_show(struct device *dev, struct device_attribute *attr, char *buf) { ssize_t ret; struct hwrng *rng; rng = get_current_rng(); if (IS_ERR(rng)) return PTR_ERR(rng); if (!rng) /* no need to put_rng */ return -ENODEV; ret = sysfs_emit(buf, "%hu\n", rng->quality); put_rng(rng); return ret; } static ssize_t rng_quality_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { u16 quality; int ret = -EINVAL; if (len < 2) return -EINVAL; ret = mutex_lock_interruptible(&rng_mutex); if (ret) return -ERESTARTSYS; ret = kstrtou16(buf, 0, &quality); if (ret || quality > 1024) { ret = -EINVAL; goto out; } if (!current_rng) { ret = -ENODEV; goto out; } current_rng->quality = quality; current_quality = quality; /* obsolete */ /* the best available RNG may have changed */ ret = enable_best_rng(); out: mutex_unlock(&rng_mutex); return ret ? ret : len; } static DEVICE_ATTR_RW(rng_current); static DEVICE_ATTR_RO(rng_available); static DEVICE_ATTR_RO(rng_selected); static DEVICE_ATTR_RW(rng_quality); static struct attribute *rng_dev_attrs[] = { &dev_attr_rng_current.attr, &dev_attr_rng_available.attr, &dev_attr_rng_selected.attr, &dev_attr_rng_quality.attr, NULL }; ATTRIBUTE_GROUPS(rng_dev); static int hwrng_fillfn(void *unused) { size_t entropy, entropy_credit = 0; /* in 1/1024 of a bit */ long rc; while (!kthread_should_stop()) { unsigned short quality; struct hwrng *rng; rng = get_current_rng(); if (IS_ERR(rng) || !rng) break; mutex_lock(&reading_mutex); rc = rng_get_data(rng, rng_fillbuf, rng_buffer_size(), 1); if (current_quality != rng->quality) rng->quality = current_quality; /* obsolete */ quality = rng->quality; mutex_unlock(&reading_mutex); if (rc <= 0) hwrng_msleep(rng, 10000); put_rng(rng); if (rc <= 0) continue; /* If we cannot credit at least one bit of entropy, * keep track of the remainder for the next iteration */ entropy = rc * quality * 8 + entropy_credit; if ((entropy >> 10) == 0) entropy_credit = entropy; /* Outside lock, sure, but y'know: randomness. */ add_hwgenerator_randomness((void *)rng_fillbuf, rc, entropy >> 10, true); } hwrng_fill = NULL; return 0; } int hwrng_register(struct hwrng *rng) { int err = -EINVAL; struct hwrng *tmp; if (!rng->name || (!rng->data_read && !rng->read)) goto out; mutex_lock(&rng_mutex); /* Must not register two RNGs with the same name. */ err = -EEXIST; list_for_each_entry(tmp, &rng_list, list) { if (strcmp(tmp->name, rng->name) == 0) goto out_unlock; } list_add_tail(&rng->list, &rng_list); init_completion(&rng->cleanup_done); complete(&rng->cleanup_done); init_completion(&rng->dying); /* Adjust quality field to always have a proper value */ rng->quality = min_t(u16, min_t(u16, default_quality, 1024), rng->quality ?: 1024); if (!current_rng || (!cur_rng_set_by_user && rng->quality > current_rng->quality)) { /* * Set new rng as current as the new rng source * provides better entropy quality and was not * chosen by userspace. */ err = set_current_rng(rng); if (err) goto out_unlock; } mutex_unlock(&rng_mutex); return 0; out_unlock: mutex_unlock(&rng_mutex); out: return err; } EXPORT_SYMBOL_GPL(hwrng_register); void hwrng_unregister(struct hwrng *rng) { struct hwrng *new_rng; int err; mutex_lock(&rng_mutex); list_del(&rng->list); complete_all(&rng->dying); if (current_rng == rng) { err = enable_best_rng(); if (err) { drop_current_rng(); cur_rng_set_by_user = 0; } } new_rng = get_current_rng_nolock(); if (list_empty(&rng_list)) { mutex_unlock(&rng_mutex); if (hwrng_fill) kthread_stop(hwrng_fill); } else mutex_unlock(&rng_mutex); if (new_rng) put_rng(new_rng); wait_for_completion(&rng->cleanup_done); } EXPORT_SYMBOL_GPL(hwrng_unregister); static void devm_hwrng_release(struct device *dev, void *res) { hwrng_unregister(*(struct hwrng **)res); } static int devm_hwrng_match(struct device *dev, void *res, void *data) { struct hwrng **r = res; if (WARN_ON(!r || !*r)) return 0; return *r == data; } int devm_hwrng_register(struct device *dev, struct hwrng *rng) { struct hwrng **ptr; int error; ptr = devres_alloc(devm_hwrng_release, sizeof(*ptr), GFP_KERNEL); if (!ptr) return -ENOMEM; error = hwrng_register(rng); if (error) { devres_free(ptr); return error; } *ptr = rng; devres_add(dev, ptr); return 0; } EXPORT_SYMBOL_GPL(devm_hwrng_register); void devm_hwrng_unregister(struct device *dev, struct hwrng *rng) { devres_release(dev, devm_hwrng_release, devm_hwrng_match, rng); } EXPORT_SYMBOL_GPL(devm_hwrng_unregister); long hwrng_msleep(struct hwrng *rng, unsigned int msecs) { unsigned long timeout = msecs_to_jiffies(msecs) + 1; return wait_for_completion_interruptible_timeout(&rng->dying, timeout); } EXPORT_SYMBOL_GPL(hwrng_msleep); long hwrng_yield(struct hwrng *rng) { return wait_for_completion_interruptible_timeout(&rng->dying, 1); } EXPORT_SYMBOL_GPL(hwrng_yield); static int __init hwrng_modinit(void) { int ret; /* kmalloc makes this safe for virt_to_page() in virtio_rng.c */ rng_buffer = kmalloc(rng_buffer_size(), GFP_KERNEL); if (!rng_buffer) return -ENOMEM; rng_fillbuf = kmalloc(rng_buffer_size(), GFP_KERNEL); if (!rng_fillbuf) { kfree(rng_buffer); return -ENOMEM; } ret = misc_register(&rng_miscdev); if (ret) { kfree(rng_fillbuf); kfree(rng_buffer); } return ret; } static void __exit hwrng_modexit(void) { mutex_lock(&rng_mutex); BUG_ON(current_rng); kfree(rng_buffer); kfree(rng_fillbuf); mutex_unlock(&rng_mutex); misc_deregister(&rng_miscdev); } fs_initcall(hwrng_modinit); /* depends on misc_register() */ module_exit(hwrng_modexit); MODULE_DESCRIPTION("H/W Random Number Generator (RNG) driver"); MODULE_LICENSE("GPL");
55 54 55 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 // SPDX-License-Identifier: GPL-2.0-only /* * HT handling * * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi> * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH * Copyright (C) 2018 - 2024 Intel Corporation */ #include <linux/ieee80211.h> #include <linux/slab.h> #include <linux/export.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "driver-ops.h" #include "wme.h" /** * DOC: TX A-MPDU aggregation * * Aggregation on the TX side requires setting the hardware flag * %IEEE80211_HW_AMPDU_AGGREGATION. The driver will then be handed * packets with a flag indicating A-MPDU aggregation. The driver * or device is responsible for actually aggregating the frames, * as well as deciding how many and which to aggregate. * * When TX aggregation is started by some subsystem (usually the rate * control algorithm would be appropriate) by calling the * ieee80211_start_tx_ba_session() function, the driver will be * notified via its @ampdu_action function, with the * %IEEE80211_AMPDU_TX_START action. * * In response to that, the driver is later required to call the * ieee80211_start_tx_ba_cb_irqsafe() function, which will really * start the aggregation session after the peer has also responded. * If the peer responds negatively, the session will be stopped * again right away. Note that it is possible for the aggregation * session to be stopped before the driver has indicated that it * is done setting it up, in which case it must not indicate the * setup completion. * * Also note that, since we also need to wait for a response from * the peer, the driver is notified of the completion of the * handshake by the %IEEE80211_AMPDU_TX_OPERATIONAL action to the * @ampdu_action callback. * * Similarly, when the aggregation session is stopped by the peer * or something calling ieee80211_stop_tx_ba_session(), the driver's * @ampdu_action function will be called with the action * %IEEE80211_AMPDU_TX_STOP. In this case, the call must not fail, * and the driver must later call ieee80211_stop_tx_ba_cb_irqsafe(). * Note that the sta can get destroyed before the BA tear down is * complete. */ static void ieee80211_send_addba_request(struct sta_info *sta, u16 tid, u8 dialog_token, u16 start_seq_num, u16 agg_size, u16 timeout) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; u16 capab; skb = dev_alloc_skb(sizeof(*mgmt) + 2 + sizeof(struct ieee80211_addba_ext_ie) + local->hw.extra_tx_headroom); if (!skb) return; skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = ieee80211_mgmt_ba(skb, sta->sta.addr, sdata); skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req)); mgmt->u.action.category = WLAN_CATEGORY_BACK; mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ; mgmt->u.action.u.addba_req.dialog_token = dialog_token; capab = IEEE80211_ADDBA_PARAM_AMSDU_MASK; capab |= IEEE80211_ADDBA_PARAM_POLICY_MASK; capab |= u16_encode_bits(tid, IEEE80211_ADDBA_PARAM_TID_MASK); capab |= u16_encode_bits(agg_size, IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK); mgmt->u.action.u.addba_req.capab = cpu_to_le16(capab); mgmt->u.action.u.addba_req.timeout = cpu_to_le16(timeout); mgmt->u.action.u.addba_req.start_seq_num = cpu_to_le16(start_seq_num << 4); if (sta->sta.deflink.he_cap.has_he) ieee80211_add_addbaext(skb, 0, agg_size); ieee80211_tx_skb_tid(sdata, skb, tid, -1); } void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_bar *bar; u16 bar_control = 0; skb = dev_alloc_skb(sizeof(*bar) + local->hw.extra_tx_headroom); if (!skb) return; skb_reserve(skb, local->hw.extra_tx_headroom); bar = skb_put_zero(skb, sizeof(*bar)); bar->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_BACK_REQ); memcpy(bar->ra, ra, ETH_ALEN); memcpy(bar->ta, sdata->vif.addr, ETH_ALEN); bar_control |= (u16)IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL; bar_control |= (u16)IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA; bar_control |= (u16)(tid << IEEE80211_BAR_CTRL_TID_INFO_SHIFT); bar->control = cpu_to_le16(bar_control); bar->start_seq_num = cpu_to_le16(ssn); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | IEEE80211_TX_CTL_REQ_TX_STATUS; ieee80211_tx_skb_tid(sdata, skb, tid, -1); } EXPORT_SYMBOL(ieee80211_send_bar); void ieee80211_assign_tid_tx(struct sta_info *sta, int tid, struct tid_ampdu_tx *tid_tx) { lockdep_assert_wiphy(sta->local->hw.wiphy); lockdep_assert_held(&sta->lock); rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], tid_tx); } /* * When multiple aggregation sessions on multiple stations * are being created/destroyed simultaneously, we need to * refcount the global queue stop caused by that in order * to not get into a situation where one of the aggregation * setup or teardown re-enables queues before the other is * ready to handle that. * * These two functions take care of this issue by keeping * a global "agg_queue_stop" refcount. */ static void __acquires(agg_queue) ieee80211_stop_queue_agg(struct ieee80211_sub_if_data *sdata, int tid) { int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; /* we do refcounting here, so don't use the queue reason refcounting */ if (atomic_inc_return(&sdata->local->agg_queue_stop[queue]) == 1) ieee80211_stop_queue_by_reason( &sdata->local->hw, queue, IEEE80211_QUEUE_STOP_REASON_AGGREGATION, false); __acquire(agg_queue); } static void __releases(agg_queue) ieee80211_wake_queue_agg(struct ieee80211_sub_if_data *sdata, int tid) { int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; if (atomic_dec_return(&sdata->local->agg_queue_stop[queue]) == 0) ieee80211_wake_queue_by_reason( &sdata->local->hw, queue, IEEE80211_QUEUE_STOP_REASON_AGGREGATION, false); __release(agg_queue); } static void ieee80211_agg_stop_txq(struct sta_info *sta, int tid) { struct ieee80211_txq *txq = sta->sta.txq[tid]; struct ieee80211_sub_if_data *sdata; struct fq *fq; struct txq_info *txqi; if (!txq) return; txqi = to_txq_info(txq); sdata = vif_to_sdata(txq->vif); fq = &sdata->local->fq; /* Lock here to protect against further seqno updates on dequeue */ spin_lock_bh(&fq->lock); set_bit(IEEE80211_TXQ_STOP, &txqi->flags); spin_unlock_bh(&fq->lock); } static void ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable) { struct ieee80211_txq *txq = sta->sta.txq[tid]; struct txq_info *txqi; lockdep_assert_wiphy(sta->local->hw.wiphy); if (!txq) return; txqi = to_txq_info(txq); if (enable) set_bit(IEEE80211_TXQ_AMPDU, &txqi->flags); else clear_bit(IEEE80211_TXQ_AMPDU, &txqi->flags); clear_bit(IEEE80211_TXQ_STOP, &txqi->flags); local_bh_disable(); rcu_read_lock(); schedule_and_wake_txq(sta->sdata->local, txqi); rcu_read_unlock(); local_bh_enable(); } /* * splice packets from the STA's pending to the local pending, * requires a call to ieee80211_agg_splice_finish later */ static void __acquires(agg_queue) ieee80211_agg_splice_packets(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_tx *tid_tx, u16 tid) { struct ieee80211_local *local = sdata->local; int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; unsigned long flags; ieee80211_stop_queue_agg(sdata, tid); if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates from the pending queue\n", tid)) return; if (!skb_queue_empty(&tid_tx->pending)) { spin_lock_irqsave(&local->queue_stop_reason_lock, flags); /* copy over remaining packets */ skb_queue_splice_tail_init(&tid_tx->pending, &local->pending[queue]); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); } } static void __releases(agg_queue) ieee80211_agg_splice_finish(struct ieee80211_sub_if_data *sdata, u16 tid) { ieee80211_wake_queue_agg(sdata, tid); } static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid) { struct tid_ampdu_tx *tid_tx; lockdep_assert_wiphy(sta->local->hw.wiphy); lockdep_assert_held(&sta->lock); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); /* * When we get here, the TX path will not be lockless any more wrt. * aggregation, since the OPERATIONAL bit has long been cleared. * Thus it will block on getting the lock, if it occurs. So if we * stop the queue now, we will not get any more packets, and any * that might be being processed will wait for us here, thereby * guaranteeing that no packets go to the tid_tx pending queue any * more. */ ieee80211_agg_splice_packets(sta->sdata, tid_tx, tid); /* future packets must not find the tid_tx struct any more */ ieee80211_assign_tid_tx(sta, tid, NULL); ieee80211_agg_splice_finish(sta->sdata, tid); kfree_rcu(tid_tx, rcu_head); } int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, enum ieee80211_agg_stop_reason reason) { struct ieee80211_local *local = sta->local; struct tid_ampdu_tx *tid_tx; struct ieee80211_ampdu_params params = { .sta = &sta->sta, .tid = tid, .buf_size = 0, .amsdu = false, .timeout = 0, .ssn = 0, }; int ret; lockdep_assert_wiphy(sta->local->hw.wiphy); switch (reason) { case AGG_STOP_DECLINED: case AGG_STOP_LOCAL_REQUEST: case AGG_STOP_PEER_REQUEST: params.action = IEEE80211_AMPDU_TX_STOP_CONT; break; case AGG_STOP_DESTROY_STA: params.action = IEEE80211_AMPDU_TX_STOP_FLUSH; break; default: WARN_ON_ONCE(1); return -EINVAL; } spin_lock_bh(&sta->lock); /* free struct pending for start, if present */ tid_tx = sta->ampdu_mlme.tid_start_tx[tid]; kfree(tid_tx); sta->ampdu_mlme.tid_start_tx[tid] = NULL; tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (!tid_tx) { spin_unlock_bh(&sta->lock); return -ENOENT; } /* * if we're already stopping ignore any new requests to stop * unless we're destroying it in which case notify the driver */ if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { spin_unlock_bh(&sta->lock); if (reason != AGG_STOP_DESTROY_STA) return -EALREADY; params.action = IEEE80211_AMPDU_TX_STOP_FLUSH_CONT; ret = drv_ampdu_action(local, sta->sdata, &params); WARN_ON_ONCE(ret); return 0; } if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) { /* not even started yet! */ ieee80211_assign_tid_tx(sta, tid, NULL); spin_unlock_bh(&sta->lock); kfree_rcu(tid_tx, rcu_head); return 0; } set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state); ieee80211_agg_stop_txq(sta, tid); spin_unlock_bh(&sta->lock); ht_dbg(sta->sdata, "Tx BA session stop requested for %pM tid %u\n", sta->sta.addr, tid); timer_delete_sync(&tid_tx->addba_resp_timer); timer_delete_sync(&tid_tx->session_timer); /* * After this packets are no longer handed right through * to the driver but are put onto tid_tx->pending instead, * with locking to ensure proper access. */ clear_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state); /* * There might be a few packets being processed right now (on * another CPU) that have already gotten past the aggregation * check when it was still OPERATIONAL and consequently have * IEEE80211_TX_CTL_AMPDU set. In that case, this code might * call into the driver at the same time or even before the * TX paths calls into it, which could confuse the driver. * * Wait for all currently running TX paths to finish before * telling the driver. New packets will not go through since * the aggregation session is no longer OPERATIONAL. */ if (!local->in_reconfig) synchronize_net(); tid_tx->stop_initiator = reason == AGG_STOP_PEER_REQUEST ? WLAN_BACK_RECIPIENT : WLAN_BACK_INITIATOR; tid_tx->tx_stop = reason == AGG_STOP_LOCAL_REQUEST; ret = drv_ampdu_action(local, sta->sdata, &params); /* HW shall not deny going back to legacy */ if (WARN_ON(ret)) { /* * We may have pending packets get stuck in this case... * Not bothering with a workaround for now. */ } /* * In the case of AGG_STOP_DESTROY_STA, the driver won't * necessarily call ieee80211_stop_tx_ba_cb(), so this may * seem like we can leave the tid_tx data pending forever. * This is true, in a way, but "forever" is only until the * station struct is actually destroyed. In the meantime, * leaving it around ensures that we don't transmit packets * to the driver on this TID which might confuse it. */ return 0; } /* * After sending add Block Ack request we activated a timer until * add Block Ack response will arrive from the recipient. * If this timer expires sta_addba_resp_timer_expired will be executed. */ static void sta_addba_resp_timer_expired(struct timer_list *t) { struct tid_ampdu_tx *tid_tx = timer_container_of(tid_tx, t, addba_resp_timer); struct sta_info *sta = tid_tx->sta; u8 tid = tid_tx->tid; /* check if the TID waits for addBA response */ if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { ht_dbg(sta->sdata, "timer expired on %pM tid %d not expecting addBA response\n", sta->sta.addr, tid); return; } ht_dbg(sta->sdata, "addBA response timer expired on %pM tid %d\n", sta->sta.addr, tid); ieee80211_stop_tx_ba_session(&sta->sta, tid); } static void ieee80211_send_addba_with_timeout(struct sta_info *sta, struct tid_ampdu_tx *tid_tx) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sta->local; u8 tid = tid_tx->tid; u16 buf_size; if (WARN_ON_ONCE(test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state) || test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state))) return; lockdep_assert_wiphy(sta->local->hw.wiphy); /* activate the timer for the recipient's addBA response */ mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); ht_dbg(sdata, "activated addBA response timer on %pM tid %d\n", sta->sta.addr, tid); spin_lock_bh(&sta->lock); sta->ampdu_mlme.last_addba_req_time[tid] = jiffies; sta->ampdu_mlme.addba_req_num[tid]++; spin_unlock_bh(&sta->lock); if (sta->sta.valid_links || sta->sta.deflink.eht_cap.has_eht || ieee80211_hw_check(&local->hw, STRICT)) { buf_size = local->hw.max_tx_aggregation_subframes; } else if (sta->sta.deflink.he_cap.has_he) { buf_size = min_t(u16, local->hw.max_tx_aggregation_subframes, IEEE80211_MAX_AMPDU_BUF_HE); } else { /* * We really should use what the driver told us it will * transmit as the maximum, but certain APs (e.g. the * LinkSys WRT120N with FW v1.0.07 build 002 Jun 18 2012) * will crash when we use a lower number. */ buf_size = IEEE80211_MAX_AMPDU_BUF_HT; } /* send AddBA request */ ieee80211_send_addba_request(sta, tid, tid_tx->dialog_token, tid_tx->ssn, buf_size, tid_tx->timeout); WARN_ON(test_and_set_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state)); } void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) { struct tid_ampdu_tx *tid_tx; struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_ampdu_params params = { .sta = &sta->sta, .action = IEEE80211_AMPDU_TX_START, .tid = tid, .buf_size = 0, .amsdu = false, .timeout = 0, }; int ret; tid_tx = rcu_dereference_protected_tid_tx(sta, tid); /* * Start queuing up packets for this aggregation session. * We're going to release them once the driver is OK with * that. */ clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state); /* * Make sure no packets are being processed. This ensures that * we have a valid starting sequence number and that in-flight * packets have been flushed out and no packets for this TID * will go into the driver during the ampdu_action call. */ synchronize_net(); params.ssn = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, &params); tid_tx->ssn = params.ssn; if (ret == IEEE80211_AMPDU_TX_START_DELAY_ADDBA) { return; } else if (ret == IEEE80211_AMPDU_TX_START_IMMEDIATE) { /* * We didn't send the request yet, so don't need to check * here if we already got a response, just mark as driver * ready immediately. */ set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state); } else if (ret) { ht_dbg(sdata, "BA request denied - HW unavailable for %pM tid %d\n", sta->sta.addr, tid); spin_lock_bh(&sta->lock); ieee80211_agg_splice_packets(sdata, tid_tx, tid); ieee80211_assign_tid_tx(sta, tid, NULL); ieee80211_agg_splice_finish(sdata, tid); spin_unlock_bh(&sta->lock); ieee80211_agg_start_txq(sta, tid, false); kfree_rcu(tid_tx, rcu_head); return; } ieee80211_send_addba_with_timeout(sta, tid_tx); } void ieee80211_refresh_tx_agg_session_timer(struct ieee80211_sta *pubsta, u16 tid) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct tid_ampdu_tx *tid_tx; if (WARN_ON_ONCE(tid >= IEEE80211_NUM_TIDS)) return; tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); if (!tid_tx) return; tid_tx->last_tx = jiffies; } EXPORT_SYMBOL(ieee80211_refresh_tx_agg_session_timer); /* * After accepting the AddBA Response we activated a timer, * resetting it after each frame that we send. */ static void sta_tx_agg_session_timer_expired(struct timer_list *t) { struct tid_ampdu_tx *tid_tx = timer_container_of(tid_tx, t, session_timer); struct sta_info *sta = tid_tx->sta; u8 tid = tid_tx->tid; unsigned long timeout; if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { return; } timeout = tid_tx->last_tx + TU_TO_JIFFIES(tid_tx->timeout); if (time_is_after_jiffies(timeout)) { mod_timer(&tid_tx->session_timer, timeout); return; } ht_dbg(sta->sdata, "tx session timer expired on %pM tid %d\n", sta->sta.addr, tid); ieee80211_stop_tx_ba_session(&sta->sta, tid); } int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, u16 timeout) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct tid_ampdu_tx *tid_tx; int ret = 0; trace_api_start_tx_ba_session(pubsta, tid); if (WARN(sta->reserved_tid == tid, "Requested to start BA session on reserved tid=%d", tid)) return -EINVAL; if (!pubsta->valid_links && !pubsta->deflink.ht_cap.ht_supported && !pubsta->deflink.vht_cap.vht_supported && !pubsta->deflink.he_cap.has_he && !pubsta->deflink.eht_cap.has_eht && !pubsta->deflink.s1g_cap.s1g) return -EINVAL; if (WARN_ON_ONCE(!local->ops->ampdu_action)) return -EINVAL; if ((tid >= IEEE80211_NUM_TIDS) || !ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION) || ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW)) return -EINVAL; if (WARN_ON(tid >= IEEE80211_FIRST_TSPEC_TSID)) return -EINVAL; ht_dbg(sdata, "Open BA session requested for %pM tid %u\n", pubsta->addr, tid); if (sdata->vif.type != NL80211_IFTYPE_STATION && sdata->vif.type != NL80211_IFTYPE_MESH_POINT && sdata->vif.type != NL80211_IFTYPE_AP_VLAN && sdata->vif.type != NL80211_IFTYPE_AP && sdata->vif.type != NL80211_IFTYPE_ADHOC) return -EINVAL; if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { ht_dbg(sdata, "BA sessions blocked - Denying BA session request %pM tid %d\n", sta->sta.addr, tid); return -EINVAL; } if (test_sta_flag(sta, WLAN_STA_MFP) && !test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { ht_dbg(sdata, "MFP STA not authorized - deny BA session request %pM tid %d\n", sta->sta.addr, tid); return -EINVAL; } /* * 802.11n-2009 11.5.1.1: If the initiating STA is an HT STA, is a * member of an IBSS, and has no other existing Block Ack agreement * with the recipient STA, then the initiating STA shall transmit a * Probe Request frame to the recipient STA and shall not transmit an * ADDBA Request frame unless it receives a Probe Response frame * from the recipient within dot11ADDBAFailureTimeout. * * The probe request mechanism for ADDBA is currently not implemented, * but we only build up Block Ack session with HT STAs. This information * is set when we receive a bss info from a probe response or a beacon. */ if (sta->sdata->vif.type == NL80211_IFTYPE_ADHOC && !sta->sta.deflink.ht_cap.ht_supported) { ht_dbg(sdata, "BA request denied - IBSS STA %pM does not advertise HT support\n", pubsta->addr); return -EINVAL; } spin_lock_bh(&sta->lock); /* we have tried too many times, receiver does not want A-MPDU */ if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) { ret = -EBUSY; goto err_unlock_sta; } /* * if we have tried more than HT_AGG_BURST_RETRIES times we * will spread our requests in time to avoid stalling connection * for too long */ if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_BURST_RETRIES && time_before(jiffies, sta->ampdu_mlme.last_addba_req_time[tid] + HT_AGG_RETRIES_PERIOD)) { ht_dbg(sdata, "BA request denied - %d failed requests on %pM tid %u\n", sta->ampdu_mlme.addba_req_num[tid], sta->sta.addr, tid); ret = -EBUSY; goto err_unlock_sta; } tid_tx = rcu_dereference_protected_tid_tx(sta, tid); /* check if the TID is not in aggregation flow already */ if (tid_tx || sta->ampdu_mlme.tid_start_tx[tid]) { ht_dbg(sdata, "BA request denied - session is not idle on %pM tid %u\n", sta->sta.addr, tid); ret = -EAGAIN; goto err_unlock_sta; } /* prepare A-MPDU MLME for Tx aggregation */ tid_tx = kzalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC); if (!tid_tx) { ret = -ENOMEM; goto err_unlock_sta; } skb_queue_head_init(&tid_tx->pending); __set_bit(HT_AGG_STATE_WANT_START, &tid_tx->state); tid_tx->timeout = timeout; tid_tx->sta = sta; tid_tx->tid = tid; /* response timer */ timer_setup(&tid_tx->addba_resp_timer, sta_addba_resp_timer_expired, 0); /* tx timer */ timer_setup(&tid_tx->session_timer, sta_tx_agg_session_timer_expired, TIMER_DEFERRABLE); /* assign a dialog token */ sta->ampdu_mlme.dialog_token_allocator++; tid_tx->dialog_token = sta->ampdu_mlme.dialog_token_allocator; /* * Finally, assign it to the start array; the work item will * collect it and move it to the normal array. */ sta->ampdu_mlme.tid_start_tx[tid] = tid_tx; wiphy_work_queue(local->hw.wiphy, &sta->ampdu_mlme.work); /* this flow continues off the work */ err_unlock_sta: spin_unlock_bh(&sta->lock); return ret; } EXPORT_SYMBOL(ieee80211_start_tx_ba_session); static void ieee80211_agg_tx_operational(struct ieee80211_local *local, struct sta_info *sta, u16 tid) { struct tid_ampdu_tx *tid_tx; struct ieee80211_ampdu_params params = { .sta = &sta->sta, .action = IEEE80211_AMPDU_TX_OPERATIONAL, .tid = tid, .timeout = 0, .ssn = 0, }; lockdep_assert_wiphy(sta->local->hw.wiphy); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); params.buf_size = tid_tx->buf_size; params.amsdu = tid_tx->amsdu; ht_dbg(sta->sdata, "Aggregation is on for %pM tid %d\n", sta->sta.addr, tid); drv_ampdu_action(local, sta->sdata, &params); /* * synchronize with TX path, while splicing the TX path * should block so it won't put more packets onto pending. */ spin_lock_bh(&sta->lock); ieee80211_agg_splice_packets(sta->sdata, tid_tx, tid); /* * Now mark as operational. This will be visible * in the TX path, and lets it go lock-free in * the common case. */ set_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state); ieee80211_agg_splice_finish(sta->sdata, tid); spin_unlock_bh(&sta->lock); ieee80211_agg_start_txq(sta, tid, true); } void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid, struct tid_ampdu_tx *tid_tx) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; lockdep_assert_wiphy(sta->local->hw.wiphy); if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))) return; if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state) || test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state)) return; if (!test_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state)) { ieee80211_send_addba_with_timeout(sta, tid_tx); /* RESPONSE_RECEIVED state would trigger the flow again */ return; } if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) ieee80211_agg_tx_operational(local, sta, tid); } static struct tid_ampdu_tx * ieee80211_lookup_tid_tx(struct ieee80211_sub_if_data *sdata, const u8 *ra, u16 tid, struct sta_info **sta) { struct tid_ampdu_tx *tid_tx; if (tid >= IEEE80211_NUM_TIDS) { ht_dbg(sdata, "Bad TID value: tid = %d (>= %d)\n", tid, IEEE80211_NUM_TIDS); return NULL; } *sta = sta_info_get_bss(sdata, ra); if (!*sta) { ht_dbg(sdata, "Could not find station: %pM\n", ra); return NULL; } tid_tx = rcu_dereference((*sta)->ampdu_mlme.tid_tx[tid]); if (WARN_ON(!tid_tx)) ht_dbg(sdata, "addBA was not requested!\n"); return tid_tx; } void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, const u8 *ra, u16 tid) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct tid_ampdu_tx *tid_tx; trace_api_start_tx_ba_cb(sdata, ra, tid); rcu_read_lock(); tid_tx = ieee80211_lookup_tid_tx(sdata, ra, tid, &sta); if (!tid_tx) goto out; set_bit(HT_AGG_STATE_START_CB, &tid_tx->state); wiphy_work_queue(local->hw.wiphy, &sta->ampdu_mlme.work); out: rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct tid_ampdu_tx *tid_tx; int ret = 0; trace_api_stop_tx_ba_session(pubsta, tid); if (!local->ops->ampdu_action) return -EINVAL; if (tid >= IEEE80211_NUM_TIDS) return -EINVAL; spin_lock_bh(&sta->lock); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (!tid_tx) { ret = -ENOENT; goto unlock; } WARN(sta->reserved_tid == tid, "Requested to stop BA session on reserved tid=%d", tid); if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { /* already in progress stopping it */ ret = 0; goto unlock; } set_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state); wiphy_work_queue(local->hw.wiphy, &sta->ampdu_mlme.work); unlock: spin_unlock_bh(&sta->lock); return ret; } EXPORT_SYMBOL(ieee80211_stop_tx_ba_session); void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, struct tid_ampdu_tx *tid_tx) { struct ieee80211_sub_if_data *sdata = sta->sdata; bool send_delba = false; bool start_txq = false; ht_dbg(sdata, "Stopping Tx BA session for %pM tid %d\n", sta->sta.addr, tid); spin_lock_bh(&sta->lock); if (!test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { ht_dbg(sdata, "unexpected callback to A-MPDU stop for %pM tid %d\n", sta->sta.addr, tid); goto unlock_sta; } if (tid_tx->stop_initiator == WLAN_BACK_INITIATOR && tid_tx->tx_stop) send_delba = true; ieee80211_remove_tid_tx(sta, tid); start_txq = true; unlock_sta: spin_unlock_bh(&sta->lock); if (start_txq) ieee80211_agg_start_txq(sta, tid, false); if (send_delba) ieee80211_send_delba(sdata, sta->sta.addr, tid, WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); } void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, const u8 *ra, u16 tid) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct tid_ampdu_tx *tid_tx; trace_api_stop_tx_ba_cb(sdata, ra, tid); rcu_read_lock(); tid_tx = ieee80211_lookup_tid_tx(sdata, ra, tid, &sta); if (!tid_tx) goto out; set_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state); wiphy_work_queue(local->hw.wiphy, &sta->ampdu_mlme.work); out: rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe); void ieee80211_process_addba_resp(struct ieee80211_local *local, struct sta_info *sta, struct ieee80211_mgmt *mgmt, size_t len) { struct tid_ampdu_tx *tid_tx; struct ieee80211_txq *txq; u16 capab, tid, buf_size; bool amsdu; lockdep_assert_wiphy(sta->local->hw.wiphy); capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK; tid = u16_get_bits(capab, IEEE80211_ADDBA_PARAM_TID_MASK); buf_size = u16_get_bits(capab, IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK); ieee80211_retrieve_addba_ext_data(sta, mgmt->u.action.u.addba_resp.variable, len - offsetof(typeof(*mgmt), u.action.u.addba_resp.variable), &buf_size); buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes); txq = sta->sta.txq[tid]; if (!amsdu && txq) set_bit(IEEE80211_TXQ_NO_AMSDU, &to_txq_info(txq)->flags); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (!tid_tx) return; if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) { ht_dbg(sta->sdata, "wrong addBA response token, %pM tid %d\n", sta->sta.addr, tid); return; } timer_delete_sync(&tid_tx->addba_resp_timer); ht_dbg(sta->sdata, "switched off addBA timer for %pM tid %d\n", sta->sta.addr, tid); /* * addba_resp_timer may have fired before we got here, and * caused WANT_STOP to be set. If the stop then was already * processed further, STOPPING might be set. */ if (test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state) || test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { ht_dbg(sta->sdata, "got addBA resp for %pM tid %d but we already gave up\n", sta->sta.addr, tid); return; } /* * IEEE 802.11-2007 7.3.1.14: * In an ADDBA Response frame, when the Status Code field * is set to 0, the Buffer Size subfield is set to a value * of at least 1. */ if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) == WLAN_STATUS_SUCCESS && buf_size) { if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { /* ignore duplicate response */ return; } tid_tx->buf_size = buf_size; tid_tx->amsdu = amsdu; if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)) ieee80211_agg_tx_operational(local, sta, tid); sta->ampdu_mlme.addba_req_num[tid] = 0; tid_tx->timeout = le16_to_cpu(mgmt->u.action.u.addba_resp.timeout); if (tid_tx->timeout) { mod_timer(&tid_tx->session_timer, TU_TO_EXP_TIME(tid_tx->timeout)); tid_tx->last_tx = jiffies; } } else { __ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_DECLINED); } }
7 1 12 7 1 12 28 1 1 2 1 2 1 1 1 1 1 1 1 5 8 1 17 2 2 2 3 20 5 7 15 16 18 18 30 21 7 28 26 2 20 20 20 16 8 1 1 1 2 1 1 4 4 4 2 1 1 18 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 // SPDX-License-Identifier: GPL-2.0+ /* * USB FTDI SIO driver * * Copyright (C) 2009 - 2013 * Johan Hovold (jhovold@gmail.com) * Copyright (C) 1999 - 2001 * Greg Kroah-Hartman (greg@kroah.com) * Bill Ryder (bryder@sgi.com) * Copyright (C) 2002 * Kuba Ober (kuba@mareimbrium.org) * * See Documentation/usb/usb-serial.rst for more information on using this * driver * * See http://ftdi-usb-sio.sourceforge.net for up to date testing info * and extra documentation * * Change entries from 2004 and earlier can be found in versions of this * file in kernel versions prior to the 2.6.24 release. * */ /* Bill Ryder - bryder@sgi.com - wrote the FTDI_SIO implementation */ /* Thanx to FTDI for so kindly providing details of the protocol required */ /* to talk to the device */ /* Thanx to gkh and the rest of the usb dev group for all code I have assimilated :-) */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/mutex.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/serial.h> #include <linux/gpio/driver.h> #include <linux/usb/serial.h> #include "ftdi_sio.h" #include "ftdi_sio_ids.h" #define DRIVER_AUTHOR "Greg Kroah-Hartman <greg@kroah.com>, Bill Ryder <bryder@sgi.com>, Kuba Ober <kuba@mareimbrium.org>, Andreas Mohr, Johan Hovold <jhovold@gmail.com>" #define DRIVER_DESC "USB FTDI Serial Converters Driver" enum ftdi_chip_type { SIO, FT232A, FT232B, FT2232C, FT232R, FT232H, FT2232H, FT4232H, FT4232HA, FT232HP, FT233HP, FT2232HP, FT2233HP, FT4232HP, FT4233HP, FTX, }; struct ftdi_private { enum ftdi_chip_type chip_type; int baud_base; /* baud base clock for divisor setting */ int custom_divisor; /* custom_divisor kludge, this is for baud_base (different from what goes to the chip!) */ u16 last_set_data_value; /* the last data state set - needed for doing * a break */ int flags; /* some ASYNC_xxxx flags are supported */ unsigned long last_dtr_rts; /* saved modem control outputs */ char prev_status; /* Used for TIOCMIWAIT */ char transmit_empty; /* If transmitter is empty or not */ u16 channel; /* channel index, or 0 for legacy types */ speed_t force_baud; /* if non-zero, force the baud rate to this value */ int force_rtscts; /* if non-zero, force RTS-CTS to always be enabled */ unsigned int latency; /* latency setting in use */ unsigned short max_packet_size; struct mutex cfg_lock; /* Avoid mess by parallel calls of config ioctl() and change_speed() */ #ifdef CONFIG_GPIOLIB struct gpio_chip gc; struct mutex gpio_lock; /* protects GPIO state */ bool gpio_registered; /* is the gpiochip in kernel registered */ bool gpio_used; /* true if the user requested a gpio */ u8 gpio_altfunc; /* which pins are in gpio mode */ u8 gpio_output; /* pin directions cache */ u8 gpio_value; /* pin value for outputs */ #endif }; struct ftdi_quirk { int (*probe)(struct usb_serial *); /* Special settings for probed ports. */ void (*port_probe)(struct ftdi_private *); }; static int ftdi_jtag_probe(struct usb_serial *serial); static int ftdi_NDI_device_setup(struct usb_serial *serial); static int ftdi_stmclite_probe(struct usb_serial *serial); static int ftdi_8u2232c_probe(struct usb_serial *serial); static void ftdi_USB_UIRT_setup(struct ftdi_private *priv); static void ftdi_HE_TIRA1_setup(struct ftdi_private *priv); static const struct ftdi_quirk ftdi_jtag_quirk = { .probe = ftdi_jtag_probe, }; static const struct ftdi_quirk ftdi_NDI_device_quirk = { .probe = ftdi_NDI_device_setup, }; static const struct ftdi_quirk ftdi_USB_UIRT_quirk = { .port_probe = ftdi_USB_UIRT_setup, }; static const struct ftdi_quirk ftdi_HE_TIRA1_quirk = { .port_probe = ftdi_HE_TIRA1_setup, }; static const struct ftdi_quirk ftdi_stmclite_quirk = { .probe = ftdi_stmclite_probe, }; static const struct ftdi_quirk ftdi_8u2232c_quirk = { .probe = ftdi_8u2232c_probe, }; /* * The 8U232AM has the same API as the sio except for: * - it can support MUCH higher baudrates; up to: * o 921600 for RS232 and 2000000 for RS422/485 at 48MHz * o 230400 at 12MHz * so .. 8U232AM's baudrate setting codes are different * - it has a two byte status code. * - it returns characters every 16ms (the FTDI does it every 40ms) * * the bcdDevice value is used to differentiate FT232BM and FT245BM from * the earlier FT8U232AM and FT8U232BM. For now, include all known VID/PID * combinations in both tables. * FIXME: perhaps bcdDevice can also identify 12MHz FT8U232AM devices, * but I don't know if those ever went into mass production. [Ian Abbott] */ /* * Device ID not listed? Test it using * /sys/bus/usb-serial/drivers/ftdi_sio/new_id and send a patch or report. */ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, FTDI_BRICK_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ZEITCONTROL_TAGTRACE_MIFARE_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CTI_MINI_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CTI_NANO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_AMC232_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CANUSB_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CANDAPTER_PID) }, { USB_DEVICE(FTDI_VID, FTDI_BM_ATOM_NANO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_NXTCAM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_EV3CON_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_0_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_1_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_2_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_3_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_4_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_5_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_6_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_7_PID) }, { USB_DEVICE(FTDI_VID, FTDI_USINT_CAT_PID) }, { USB_DEVICE(FTDI_VID, FTDI_USINT_WKEY_PID) }, { USB_DEVICE(FTDI_VID, FTDI_USINT_RS232_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ACTZWAVE_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IRTRANS_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IPLUS_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IPLUS2_PID) }, { USB_DEVICE(FTDI_VID, FTDI_DMX4ALL) }, { USB_DEVICE(FTDI_VID, FTDI_SIO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_8U232AM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_8U232AM_ALT_PID) }, { USB_DEVICE(FTDI_VID, FTDI_232RL_PID) }, { USB_DEVICE(FTDI_VID, FTDI_8U2232C_PID) , .driver_info = (kernel_ulong_t)&ftdi_8u2232c_quirk }, { USB_DEVICE(FTDI_VID, FTDI_4232H_PID) }, { USB_DEVICE(FTDI_VID, FTDI_232H_PID) }, { USB_DEVICE(FTDI_VID, FTDI_FTX_PID) }, { USB_DEVICE(FTDI_VID, FTDI_FT2233HP_PID) }, { USB_DEVICE(FTDI_VID, FTDI_FT4233HP_PID) }, { USB_DEVICE(FTDI_VID, FTDI_FT2232HP_PID) }, { USB_DEVICE(FTDI_VID, FTDI_FT4232HP_PID) }, { USB_DEVICE(FTDI_VID, FTDI_FT233HP_PID) }, { USB_DEVICE(FTDI_VID, FTDI_FT232HP_PID) }, { USB_DEVICE(FTDI_VID, FTDI_FT4232HA_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MICRO_CHAMELEON_PID) }, { USB_DEVICE(FTDI_VID, FTDI_RELAIS_PID) }, { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_SNIFFER_PID) }, { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_THROTTLE_PID) }, { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_GATEWAY_PID) }, { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_GBM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_GBM_BOOST_PID) }, { USB_DEVICE(NEWPORT_VID, NEWPORT_AGILIS_PID) }, { USB_DEVICE(NEWPORT_VID, NEWPORT_CONEX_CC_PID) }, { USB_DEVICE(NEWPORT_VID, NEWPORT_CONEX_AGP_PID) }, { USB_DEVICE(INTERBIOMETRICS_VID, INTERBIOMETRICS_IOBOARD_PID) }, { USB_DEVICE(INTERBIOMETRICS_VID, INTERBIOMETRICS_MINI_IOBOARD_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SPROG_II) }, { USB_DEVICE(FTDI_VID, FTDI_TAGSYS_LP101_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TAGSYS_P200X_PID) }, { USB_DEVICE(FTDI_VID, FTDI_LENZ_LIUSB_PID) }, { USB_DEVICE(FTDI_VID, FTDI_XF_632_PID) }, { USB_DEVICE(FTDI_VID, FTDI_XF_634_PID) }, { USB_DEVICE(FTDI_VID, FTDI_XF_547_PID) }, { USB_DEVICE(FTDI_VID, FTDI_XF_633_PID) }, { USB_DEVICE(FTDI_VID, FTDI_XF_631_PID) }, { USB_DEVICE(FTDI_VID, FTDI_XF_635_PID) }, { USB_DEVICE(FTDI_VID, FTDI_XF_640_PID) }, { USB_DEVICE(FTDI_VID, FTDI_XF_642_PID) }, { USB_DEVICE(FTDI_VID, FTDI_DSS20_PID) }, { USB_DEVICE(FTDI_VID, FTDI_URBAN_0_PID) }, { USB_DEVICE(FTDI_VID, FTDI_URBAN_1_PID) }, { USB_DEVICE(FTDI_NF_RIC_VID, FTDI_NF_RIC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_VNHCPCUSB_D_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MTXORB_0_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MTXORB_1_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MTXORB_2_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MTXORB_3_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MTXORB_4_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MTXORB_5_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MTXORB_6_PID) }, { USB_DEVICE(FTDI_VID, FTDI_R2000KU_TRUE_RNG) }, { USB_DEVICE(FTDI_VID, FTDI_VARDAAN_PID) }, { USB_DEVICE(FTDI_VID, FTDI_AUTO_M3_OP_COM_V2_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0100_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0101_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0102_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0103_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0104_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0105_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0106_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0107_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0108_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0109_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_010A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_010B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_010C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_010D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_010E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_010F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0110_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0111_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0112_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0113_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0114_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0115_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0116_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0117_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0118_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0119_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_011A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_011B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_011C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_011D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_011E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_011F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0120_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0121_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0122_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0123_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0124_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0125_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0126_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0127_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0128_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0129_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_012A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_012B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_012C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_012D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_012E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_012F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0130_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0131_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0132_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0133_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0134_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0135_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0136_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0137_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0138_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0139_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_013A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_013B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_013C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_013D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_013E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_013F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0140_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0141_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0142_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0143_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0144_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0145_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0146_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0147_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0148_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0149_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_014A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_014B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_014C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_014D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_014E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_014F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0150_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0151_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0152_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0153_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0154_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0155_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0156_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0157_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0158_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0159_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_015A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_015B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_015C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_015D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_015E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_015F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0160_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0161_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0162_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0163_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0164_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0165_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0166_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0167_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0168_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0169_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_016A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_016B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_016C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_016D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_016E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_016F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0170_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0171_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0172_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0173_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0174_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0175_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0176_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0177_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0178_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0179_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_017A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_017B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_017C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_017D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_017E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_017F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0180_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0181_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0182_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0183_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0184_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0185_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0186_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0187_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0188_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0189_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_018A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_018B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_018C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_018D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_018E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_018F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0190_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0191_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0192_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0193_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0194_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0195_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0196_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0197_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0198_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0199_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_019A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_019B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_019C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_019D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_019E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_019F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A0_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A1_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A2_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A3_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A4_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A5_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A6_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A7_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A8_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A9_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01AA_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01AB_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01AC_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01AD_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01AE_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01AF_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B0_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B1_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B2_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B3_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B4_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B5_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B6_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B7_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B8_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B9_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01BA_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01BB_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01BC_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01BD_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01BE_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01BF_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C0_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C1_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C2_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C3_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C4_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C5_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C6_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C7_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C8_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C9_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01CA_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01CB_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01CC_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01CD_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01CE_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01CF_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D0_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D1_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D2_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D3_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D4_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D5_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D6_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D7_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D8_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D9_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01DA_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01DB_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01DC_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01DD_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01DE_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01DF_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E0_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E1_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E2_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E3_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E4_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E5_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E6_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E7_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E8_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E9_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01EA_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01EB_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01EC_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01ED_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01EE_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01EF_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F0_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F1_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F2_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F3_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F4_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F5_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F6_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F7_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F8_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F9_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FA_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FB_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FC_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FD_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FE_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FF_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_4701_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9300_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9301_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9302_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9303_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9304_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9305_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9306_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9307_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9308_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9309_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9310_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9311_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9312_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9313_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9314_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9315_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9316_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9317_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9318_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9319_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931F_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PERLE_ULTRAPORT_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PIEGROUP_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TNC_X_PID) }, { USB_DEVICE(FTDI_VID, FTDI_USBX_707_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2101_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2102_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2103_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2104_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2106_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2201_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2201_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2202_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2202_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2203_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2203_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2401_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2401_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2401_3_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2401_4_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2402_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2402_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2402_3_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2402_4_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2403_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2403_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2403_3_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2403_4_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_3_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_4_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_5_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_6_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_7_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_8_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_3_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_4_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_5_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_6_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_7_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_8_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_3_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_4_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_5_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_6_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_7_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_8_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803R_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803R_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803R_3_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803R_4_PID) }, { USB_DEVICE(IDTECH_VID, IDTECH_IDT1221U_PID) }, { USB_DEVICE(OCT_VID, OCT_US101_PID) }, { USB_DEVICE(OCT_VID, OCT_DK201_PID) }, { USB_DEVICE(FTDI_VID, FTDI_HE_TIRA1_PID), .driver_info = (kernel_ulong_t)&ftdi_HE_TIRA1_quirk }, { USB_DEVICE(FTDI_VID, FTDI_USB_UIRT_PID), .driver_info = (kernel_ulong_t)&ftdi_USB_UIRT_quirk }, { USB_DEVICE(FTDI_VID, PROTEGO_SPECIAL_1) }, { USB_DEVICE(FTDI_VID, PROTEGO_R2X0) }, { USB_DEVICE(FTDI_VID, PROTEGO_SPECIAL_3) }, { USB_DEVICE(FTDI_VID, PROTEGO_SPECIAL_4) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E808_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E809_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E80A_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E80B_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E80C_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E80D_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E80E_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E80F_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E888_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E889_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E88A_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E88B_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E88C_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E88D_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E88E_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E88F_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_UO100_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_UM100_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_UR100_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_ALC8500_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PYRAMID_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_FHZ1000PC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IBS_US485_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IBS_PICPRO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IBS_PCMCIA_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IBS_PK1_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IBS_RS232MON_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IBS_APP70_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IBS_PEDO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IBS_PROD_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TAVIR_STK500_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TIAO_UMPA_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLXM_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLX_PLUS_PID) }, { USB_DEVICE(FTDI_VID, FTDI_NT_ORION_IO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONMX_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SYNAPSE_SS200_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX2_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX2WI_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX3_PID) }, /* * ELV devices: */ { USB_DEVICE(FTDI_ELV_VID, FTDI_ELV_WS300_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_USR_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_MSM1_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_KL100_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_WS550_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_EC3000_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_WS888_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_TWS550_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_FEM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_CLI7000_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_PPS7330_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_TFM100_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_UDF77_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_UIO88_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_UAD8_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_UDA7_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_USI2_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_T1100_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_PCD200_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_ULA200_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_CSI8_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_EM1000DL_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_PCK100_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_RFP500_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_FS20SIG_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_UTP8_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_WS300PC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_WS444PC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_FHZ1300PC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_EM1010PC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_WS500_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_HS485_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_UMS100_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_TFD128_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_FM3RX_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_WS777_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PALMSENS_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IVIUM_XSTAT_PID) }, { USB_DEVICE(FTDI_VID, LINX_SDMUSBQSS_PID) }, { USB_DEVICE(FTDI_VID, LINX_MASTERDEVEL2_PID) }, { USB_DEVICE(FTDI_VID, LINX_FUTURE_0_PID) }, { USB_DEVICE(FTDI_VID, LINX_FUTURE_1_PID) }, { USB_DEVICE(FTDI_VID, LINX_FUTURE_2_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CCSICDU20_0_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CCSICDU40_1_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CCSMACHX_2_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CCSLOAD_N_GO_3_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CCSICDU64_4_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CCSPRIME8_5_PID) }, { USB_DEVICE(FTDI_VID, INSIDE_ACCESSO) }, { USB_DEVICE(INTREPID_VID, INTREPID_VALUECAN_PID) }, { USB_DEVICE(INTREPID_VID, INTREPID_NEOVI_PID) }, { USB_DEVICE(FALCOM_VID, FALCOM_TWIST_PID) }, { USB_DEVICE(FALCOM_VID, FALCOM_SAMBA_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SUUNTO_SPORTS_PID) }, { USB_DEVICE(FTDI_VID, FTDI_OCEANIC_PID) }, { USB_DEVICE(TTI_VID, TTI_QL355P_PID) }, { USB_DEVICE(FTDI_VID, FTDI_RM_CANVIEW_PID) }, { USB_DEVICE(ACTON_VID, ACTON_SPECTRAPRO_PID) }, { USB_DEVICE(CONTEC_VID, CONTEC_COM1USBH_PID) }, { USB_DEVICE(MITSUBISHI_VID, MITSUBISHI_FXUSB_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USOTL4_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USTL4_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USO9ML2_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USOPTL4_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USPTL4_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USO9ML2DR_2_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USO9ML2DR_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USOPTL4DR2_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USOPTL4DR_PID) }, { USB_DEVICE(BANDB_VID, BANDB_485USB9F_2W_PID) }, { USB_DEVICE(BANDB_VID, BANDB_485USB9F_4W_PID) }, { USB_DEVICE(BANDB_VID, BANDB_232USB9M_PID) }, { USB_DEVICE(BANDB_VID, BANDB_485USBTB_2W_PID) }, { USB_DEVICE(BANDB_VID, BANDB_485USBTB_4W_PID) }, { USB_DEVICE(BANDB_VID, BANDB_TTL5USB9M_PID) }, { USB_DEVICE(BANDB_VID, BANDB_TTL3USB9M_PID) }, { USB_DEVICE(BANDB_VID, BANDB_ZZ_PROG1_USB_PID) }, { USB_DEVICE(FTDI_VID, EVER_ECO_PRO_CDS) }, { USB_DEVICE(FTDI_VID, FTDI_4N_GALAXY_DE_1_PID) }, { USB_DEVICE(FTDI_VID, FTDI_4N_GALAXY_DE_2_PID) }, { USB_DEVICE(FTDI_VID, FTDI_4N_GALAXY_DE_3_PID) }, { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_0_PID) }, { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_1_PID) }, { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_2_PID) }, { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_3_PID) }, { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_4_PID) }, { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_5_PID) }, { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_6_PID) }, { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_7_PID) }, { USB_DEVICE(XSENS_VID, XSENS_AWINDA_DONGLE_PID) }, { USB_DEVICE(XSENS_VID, XSENS_AWINDA_STATION_PID) }, { USB_DEVICE(XSENS_VID, XSENS_CONVERTER_PID) }, { USB_DEVICE(XSENS_VID, XSENS_MTDEVBOARD_PID) }, { USB_DEVICE(XSENS_VID, XSENS_MTIUSBCONVERTER_PID) }, { USB_DEVICE(XSENS_VID, XSENS_MTW_PID) }, { USB_DEVICE(FTDI_VID, FTDI_OMNI1509) }, { USB_DEVICE(MOBILITY_VID, MOBILITY_USB_SERIAL_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ACTIVE_ROBOTS_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MHAM_KW_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MHAM_YS_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MHAM_Y6_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MHAM_Y8_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MHAM_IC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MHAM_DB9_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MHAM_RS232_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MHAM_Y9_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TERATRONIK_VCP_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TERATRONIK_D2XX_PID) }, { USB_DEVICE(EVOLUTION_VID, EVOLUTION_ER1_PID) }, { USB_DEVICE(EVOLUTION_VID, EVO_HYBRID_PID) }, { USB_DEVICE(EVOLUTION_VID, EVO_RCM4_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ARTEMIS_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ATIK_ATK16_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ATIK_ATK16C_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ATIK_ATK16HR_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ATIK_ATK16HRC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ATIK_ATK16IC_PID) }, { USB_DEVICE(KOBIL_VID, KOBIL_CONV_B1_PID) }, { USB_DEVICE(KOBIL_VID, KOBIL_CONV_KAAN_PID) }, { USB_DEVICE(POSIFLEX_VID, POSIFLEX_PP7000_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TTUSB_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ECLO_COM_1WIRE_PID) }, { USB_DEVICE(FTDI_VID, FTDI_WESTREX_MODEL_777_PID) }, { USB_DEVICE(FTDI_VID, FTDI_WESTREX_MODEL_8900F_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PCDJ_DAC2_PID) }, { USB_DEVICE(FTDI_VID, FTDI_RRCIRKITS_LOCOBUFFER_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ASK_RDR400_PID) }, { USB_DEVICE(FTDI_VID, FTDI_NZR_SEM_USB_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_1_PID) }, { USB_DEVICE(ICOM_VID, ICOM_OPC_U_UC_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_RP2C1_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_RP2C2_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_RP2D_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_RP2VT_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_RP2VR_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_RP4KVT_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_RP4KVR_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_RP2KVT_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_RP2KVR_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ACG_HFDUAL_PID) }, { USB_DEVICE(FTDI_VID, FTDI_YEI_SERVOCENTER31_PID) }, { USB_DEVICE(FTDI_VID, FTDI_THORLABS_PID) }, { USB_DEVICE(TESTO_VID, TESTO_1_PID) }, { USB_DEVICE(TESTO_VID, TESTO_3_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GAMMA_SCOUT_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TACTRIX_OPENPORT_13M_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TACTRIX_OPENPORT_13S_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TACTRIX_OPENPORT_13U_PID) }, { USB_DEVICE(ELEKTOR_VID, ELEKTOR_FT323R_PID) }, { USB_DEVICE(FTDI_VID, FTDI_NDI_HUC_PID), .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NDI_SPECTRA_SCU_PID), .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NDI_FUTURE_2_PID), .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NDI_FUTURE_3_PID), .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NDI_AURORA_SCU_PID), .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk }, { USB_DEVICE(FTDI_NDI_VID, FTDI_NDI_EMGUIDE_GEMINI_PID), .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk }, { USB_DEVICE(TELLDUS_VID, TELLDUS_TELLSTICK_PID) }, { USB_DEVICE(NOVITUS_VID, NOVITUS_BONO_E_PID) }, { USB_DEVICE(FTDI_VID, RTSYSTEMS_USB_VX8_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_S03_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_59_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_57A_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_57B_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29A_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29B_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29F_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_62B_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_S01_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_63_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29C_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_81B_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_82B_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_K5D_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_K4Y_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_K5G_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_S05_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_60_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_61_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_62_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_63B_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_64_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_65_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_92_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_92D_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_W5R_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_A5R_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_PW1_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MAXSTREAM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PHI_FISCO_PID) }, { USB_DEVICE(TML_VID, TML_USB_SERIAL_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELSTER_UNICOM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PROPOX_JTAGCABLEII_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PROPOX_ISPCABLEIII_PID) }, { USB_DEVICE(FTDI_VID, CYBER_CORTEX_AV_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_OCD_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_OCD_H_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_TINY_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_TINY_H_PID, 1) }, { USB_DEVICE(FIC_VID, FIC_NEO1973_DEBUG_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_OOCDLINK_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, LMI_LM3S_DEVEL_BOARD_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, LMI_LM3S_EVAL_BOARD_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, LMI_LM3S_ICDI_BOARD_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_TURTELIZER_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_USB60F) }, { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_SCU18) }, { USB_DEVICE(FTDI_VID, FTDI_REU_TINY_PID) }, /* Papouch devices based on FTDI chip */ { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB485_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_AP485_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB422_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB485_2_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_AP485_2_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB422_2_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB485S_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB485C_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_LEC_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB232_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_TMU_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_IRAMP_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_DRAK5_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO8x8_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO4x4_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO2x2_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO10x1_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO30x3_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO60x3_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO2x16_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO3x32_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_DRAK6_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_UPSUSB_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_MU_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SIMUKEY_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_AD4USB_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_GMUX_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_GMSR_PID) }, { USB_DEVICE(FTDI_VID, FTDI_DOMINTELL_DGQG_PID) }, { USB_DEVICE(FTDI_VID, FTDI_DOMINTELL_DUSB_PID) }, { USB_DEVICE(ALTI2_VID, ALTI2_N3_PID) }, { USB_DEVICE(FTDI_VID, DIEBOLD_BCS_SE923_PID) }, { USB_DEVICE(ATMEL_VID, STK541_PID) }, { USB_DEVICE(DE_VID, STB_PID) }, { USB_DEVICE(DE_VID, WHT_PID) }, { USB_DEVICE(ADI_VID, ADI_GNICE_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(ADI_VID, ADI_GNICEPLUS_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE_AND_INTERFACE_INFO(MICROCHIP_VID, MICROCHIP_USB_BOARD_PID, USB_CLASS_VENDOR_SPEC, USB_SUBCLASS_VENDOR_SPEC, 0x00) }, { USB_DEVICE_INTERFACE_NUMBER(ACTEL_VID, MICROSEMI_ARROW_SF2PLUS_BOARD_PID, 2) }, { USB_DEVICE(JETI_VID, JETI_SPC1201_PID) }, { USB_DEVICE(MARVELL_VID, MARVELL_SHEEVAPLUG_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(LARSENBRUSGAARD_VID, LB_ALTITRACK_PID) }, { USB_DEVICE(GN_OTOMETRICS_VID, AURICAL_USB_PID) }, { USB_DEVICE(FTDI_VID, PI_C865_PID) }, { USB_DEVICE(FTDI_VID, PI_C857_PID) }, { USB_DEVICE(PI_VID, PI_C866_PID) }, { USB_DEVICE(PI_VID, PI_C663_PID) }, { USB_DEVICE(PI_VID, PI_C725_PID) }, { USB_DEVICE(PI_VID, PI_E517_PID) }, { USB_DEVICE(PI_VID, PI_C863_PID) }, { USB_DEVICE(PI_VID, PI_E861_PID) }, { USB_DEVICE(PI_VID, PI_C867_PID) }, { USB_DEVICE(PI_VID, PI_E609_PID) }, { USB_DEVICE(PI_VID, PI_E709_PID) }, { USB_DEVICE(PI_VID, PI_100F_PID) }, { USB_DEVICE(PI_VID, PI_1011_PID) }, { USB_DEVICE(PI_VID, PI_1012_PID) }, { USB_DEVICE(PI_VID, PI_1013_PID) }, { USB_DEVICE(PI_VID, PI_1014_PID) }, { USB_DEVICE(PI_VID, PI_1015_PID) }, { USB_DEVICE(PI_VID, PI_1016_PID) }, { USB_DEVICE(KONDO_VID, KONDO_USB_SERIAL_PID) }, { USB_DEVICE(BAYER_VID, BAYER_CONTOUR_CABLE_PID) }, { USB_DEVICE(FTDI_VID, MARVELL_OPENRD_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, TI_XDS100V2_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, HAMEG_HO820_PID) }, { USB_DEVICE(FTDI_VID, HAMEG_HO720_PID) }, { USB_DEVICE(FTDI_VID, HAMEG_HO730_PID) }, { USB_DEVICE(FTDI_VID, HAMEG_HO870_PID) }, { USB_DEVICE(FTDI_VID, MJSG_GENERIC_PID) }, { USB_DEVICE(FTDI_VID, MJSG_SR_RADIO_PID) }, { USB_DEVICE(FTDI_VID, MJSG_HD_RADIO_PID) }, { USB_DEVICE(FTDI_VID, MJSG_XM_RADIO_PID) }, { USB_DEVICE(FTDI_VID, XVERVE_SIGNALYZER_ST_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, XVERVE_SIGNALYZER_SLITE_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, XVERVE_SIGNALYZER_SH2_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, XVERVE_SIGNALYZER_SH4_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, SEGWAY_RMP200_PID) }, { USB_DEVICE(FTDI_VID, ACCESIO_COM4SM_PID) }, { USB_DEVICE(IONICS_VID, IONICS_PLUGCOMPUTER_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_24_MASTER_WING_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_PC_WING_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_USB_DMX_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MIDI_TIMECODE_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MINI_WING_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MAXI_WING_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MEDIA_WING_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_WING_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCIENCESCOPE_LOGBOOKML_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCIENCESCOPE_LS_LOGBOOK_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCIENCESCOPE_HS_LOGBOOK_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CINTERION_MC55I_PID) }, { USB_DEVICE(FTDI_VID, FTDI_FHE_PID) }, { USB_DEVICE(FTDI_VID, FTDI_DOTEC_PID) }, { USB_DEVICE(QIHARDWARE_VID, MILKYMISTONE_JTAGSERIAL_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(ST_VID, ST_STMCLT_2232_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(ST_VID, ST_STMCLT_4232_PID), .driver_info = (kernel_ulong_t)&ftdi_stmclite_quirk }, { USB_DEVICE(FTDI_VID, FTDI_RF_R106) }, { USB_DEVICE(FTDI_VID, FTDI_DISTORTEC_JTAG_LOCK_PICK_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_LUMEL_PD12_PID) }, /* Crucible Devices */ { USB_DEVICE(FTDI_VID, FTDI_CT_COMET_PID) }, { USB_DEVICE(FTDI_VID, FTDI_Z3X_PID) }, /* Cressi Devices */ { USB_DEVICE(FTDI_VID, FTDI_CRESSI_PID) }, /* Brainboxes Devices */ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_001_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_012_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_023_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_034_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_101_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_159_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_2_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_3_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_4_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_5_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_6_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_7_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_8_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_235_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_257_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_2_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_3_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_4_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_313_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_320_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_324_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_346_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_346_2_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_357_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_606_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_606_2_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_606_3_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_701_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_701_2_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_842_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_842_2_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_842_3_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_842_4_PID) }, /* ekey Devices */ { USB_DEVICE(FTDI_VID, FTDI_EKEY_CONV_USB_PID) }, /* Infineon Devices */ { USB_DEVICE_INTERFACE_NUMBER(INFINEON_VID, INFINEON_TRIBOARD_TC1798_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(INFINEON_VID, INFINEON_TRIBOARD_TC2X7_PID, 1) }, /* GE Healthcare devices */ { USB_DEVICE(GE_HEALTHCARE_VID, GE_HEALTHCARE_NEMO_TRACKER_PID) }, /* Active Research (Actisense) devices */ { USB_DEVICE(FTDI_VID, ACTISENSE_NDC_PID) }, { USB_DEVICE(FTDI_VID, ACTISENSE_USG_PID) }, { USB_DEVICE(FTDI_VID, ACTISENSE_NGT_PID) }, { USB_DEVICE(FTDI_VID, ACTISENSE_NGW_PID) }, { USB_DEVICE(FTDI_VID, ACTISENSE_UID_PID) }, { USB_DEVICE(FTDI_VID, ACTISENSE_USA_PID) }, { USB_DEVICE(FTDI_VID, ACTISENSE_NGX_PID) }, { USB_DEVICE(FTDI_VID, ACTISENSE_D9AF_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEAGAUGE_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASWITCH_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_NMEA2000_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ETHERNET_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_WIFI_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_DISPLAY_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_LITE_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ANALOG_PID) }, /* Belimo Automation devices */ { USB_DEVICE(FTDI_VID, BELIMO_ZTH_PID) }, { USB_DEVICE(FTDI_VID, BELIMO_ZIP_PID) }, /* ICP DAS I-756xU devices */ { USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) }, { USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) }, { USB_DEVICE(ICPDAS_VID, ICPDAS_I7563U_PID) }, { USB_DEVICE(WICED_VID, WICED_USB20706V2_PID) }, { USB_DEVICE(TI_VID, TI_CC3200_LAUNCHPAD_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) }, { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) }, { USB_DEVICE(AIRBUS_DS_VID, AIRBUS_DS_P8GR) }, /* EZPrototypes devices */ { USB_DEVICE(EZPROTOTYPES_VID, HJELMSLUND_USB485_ISO_PID) }, { USB_DEVICE_INTERFACE_NUMBER(UNJO_VID, UNJO_ISODEBUG_V1_PID, 1) }, /* Sienna devices */ { USB_DEVICE(FTDI_VID, FTDI_SIENNA_PID) }, { USB_DEVICE(ECHELON_VID, ECHELON_U20_PID) }, /* IDS GmbH devices */ { USB_DEVICE(IDS_VID, IDS_SI31A_PID) }, { USB_DEVICE(IDS_VID, IDS_CM31A_PID) }, /* Omron devices */ { USB_DEVICE(OMRON_VID, OMRON_CS1W_CIF31_PID) }, /* U-Blox devices */ { USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ZED_PID) }, { USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ODIN_PID) }, /* FreeCalypso USB adapters */ { USB_DEVICE(FTDI_VID, FTDI_FALCONIA_JTAG_BUF_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_FALCONIA_JTAG_UNBUF_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, /* GMC devices */ { USB_DEVICE(GMC_VID, GMC_Z216C_PID) }, /* Altera USB Blaster 3 */ { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6022_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6025_PID, 2) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6026_PID, 2) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6026_PID, 3) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6029_PID, 2) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602A_PID, 2) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602A_PID, 3) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602C_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602D_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602D_PID, 2) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 2) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 3) }, /* Abacus Electrics */ { USB_DEVICE(FTDI_VID, ABACUS_OPTICAL_PROBE_PID) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, id_table_combined); static const char *ftdi_chip_name[] = { [SIO] = "SIO", /* the serial part of FT8U100AX */ [FT232A] = "FT232A", [FT232B] = "FT232B", [FT2232C] = "FT2232C/D", [FT232R] = "FT232R", [FT232H] = "FT232H", [FT2232H] = "FT2232H", [FT4232H] = "FT4232H", [FT4232HA] = "FT4232HA", [FT232HP] = "FT232HP", [FT233HP] = "FT233HP", [FT2232HP] = "FT2232HP", [FT2233HP] = "FT2233HP", [FT4232HP] = "FT4232HP", [FT4233HP] = "FT4233HP", [FTX] = "FT-X", }; /* Used for TIOCMIWAIT */ #define FTDI_STATUS_B0_MASK (FTDI_RS0_CTS | FTDI_RS0_DSR | FTDI_RS0_RI | FTDI_RS0_RLSD) #define FTDI_STATUS_B1_MASK (FTDI_RS_BI) /* End TIOCMIWAIT */ static void ftdi_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios); static int ftdi_get_modem_status(struct usb_serial_port *port, unsigned char status[2]); #define WDR_TIMEOUT 5000 /* default urb timeout */ #define WDR_SHORT_TIMEOUT 1000 /* shorter urb timeout */ /* * *************************************************************************** * Utility functions * *************************************************************************** */ static unsigned short int ftdi_232am_baud_base_to_divisor(int baud, int base) { unsigned short int divisor; /* divisor shifted 3 bits to the left */ int divisor3 = DIV_ROUND_CLOSEST(base, 2 * baud); if ((divisor3 & 0x7) == 7) divisor3++; /* round x.7/8 up to x+1 */ divisor = divisor3 >> 3; divisor3 &= 0x7; if (divisor3 == 1) divisor |= 0xc000; /* +0.125 */ else if (divisor3 >= 4) divisor |= 0x4000; /* +0.5 */ else if (divisor3 != 0) divisor |= 0x8000; /* +0.25 */ else if (divisor == 1) divisor = 0; /* special case for maximum baud rate */ return divisor; } static unsigned short int ftdi_232am_baud_to_divisor(int baud) { return ftdi_232am_baud_base_to_divisor(baud, 48000000); } static u32 ftdi_232bm_baud_base_to_divisor(int baud, int base) { static const unsigned char divfrac[8] = { 0, 3, 2, 4, 1, 5, 6, 7 }; u32 divisor; /* divisor shifted 3 bits to the left */ int divisor3 = DIV_ROUND_CLOSEST(base, 2 * baud); divisor = divisor3 >> 3; divisor |= (u32)divfrac[divisor3 & 0x7] << 14; /* Deal with special cases for highest baud rates. */ if (divisor == 1) /* 1.0 */ divisor = 0; else if (divisor == 0x4001) /* 1.5 */ divisor = 1; return divisor; } static u32 ftdi_232bm_baud_to_divisor(int baud) { return ftdi_232bm_baud_base_to_divisor(baud, 48000000); } static u32 ftdi_2232h_baud_base_to_divisor(int baud, int base) { static const unsigned char divfrac[8] = { 0, 3, 2, 4, 1, 5, 6, 7 }; u32 divisor; int divisor3; /* hi-speed baud rate is 10-bit sampling instead of 16-bit */ divisor3 = DIV_ROUND_CLOSEST(8 * base, 10 * baud); divisor = divisor3 >> 3; divisor |= (u32)divfrac[divisor3 & 0x7] << 14; /* Deal with special cases for highest baud rates. */ if (divisor == 1) /* 1.0 */ divisor = 0; else if (divisor == 0x4001) /* 1.5 */ divisor = 1; /* * Set this bit to turn off a divide by 2.5 on baud rate generator * This enables baud rates up to 12Mbaud but cannot reach below 1200 * baud with this bit set */ divisor |= 0x00020000; return divisor; } static u32 ftdi_2232h_baud_to_divisor(int baud) { return ftdi_2232h_baud_base_to_divisor(baud, 120000000); } #define set_mctrl(port, set) update_mctrl((port), (set), 0) #define clear_mctrl(port, clear) update_mctrl((port), 0, (clear)) static int update_mctrl(struct usb_serial_port *port, unsigned int set, unsigned int clear) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct device *dev = &port->dev; unsigned value; int rv; if (((set | clear) & (TIOCM_DTR | TIOCM_RTS)) == 0) { dev_dbg(dev, "%s - DTR|RTS not being set|cleared\n", __func__); return 0; /* no change */ } clear &= ~set; /* 'set' takes precedence over 'clear' */ value = 0; if (clear & TIOCM_DTR) value |= FTDI_SIO_SET_DTR_LOW; if (clear & TIOCM_RTS) value |= FTDI_SIO_SET_RTS_LOW; if (set & TIOCM_DTR) value |= FTDI_SIO_SET_DTR_HIGH; if (set & TIOCM_RTS) value |= FTDI_SIO_SET_RTS_HIGH; rv = usb_control_msg(port->serial->dev, usb_sndctrlpipe(port->serial->dev, 0), FTDI_SIO_SET_MODEM_CTRL_REQUEST, FTDI_SIO_SET_MODEM_CTRL_REQUEST_TYPE, value, priv->channel, NULL, 0, WDR_TIMEOUT); if (rv < 0) { dev_dbg(dev, "%s Error from MODEM_CTRL urb: DTR %s, RTS %s\n", __func__, (set & TIOCM_DTR) ? "HIGH" : (clear & TIOCM_DTR) ? "LOW" : "unchanged", (set & TIOCM_RTS) ? "HIGH" : (clear & TIOCM_RTS) ? "LOW" : "unchanged"); rv = usb_translate_errors(rv); } else { dev_dbg(dev, "%s - DTR %s, RTS %s\n", __func__, (set & TIOCM_DTR) ? "HIGH" : (clear & TIOCM_DTR) ? "LOW" : "unchanged", (set & TIOCM_RTS) ? "HIGH" : (clear & TIOCM_RTS) ? "LOW" : "unchanged"); /* FIXME: locking on last_dtr_rts */ priv->last_dtr_rts = (priv->last_dtr_rts & ~clear) | set; } return rv; } static u32 get_ftdi_divisor(struct tty_struct *tty, struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct device *dev = &port->dev; u32 div_value = 0; int div_okay = 1; int baud; baud = tty_get_baud_rate(tty); dev_dbg(dev, "%s - tty_get_baud_rate reports speed %d\n", __func__, baud); /* * Observe deprecated async-compatible custom_divisor hack, update * baudrate if needed. */ if (baud == 38400 && ((priv->flags & ASYNC_SPD_MASK) == ASYNC_SPD_CUST) && (priv->custom_divisor)) { baud = priv->baud_base / priv->custom_divisor; dev_dbg(dev, "%s - custom divisor %d sets baud rate to %d\n", __func__, priv->custom_divisor, baud); } if (!baud) baud = 9600; switch (priv->chip_type) { case SIO: switch (baud) { case 300: div_value = ftdi_sio_b300; break; case 600: div_value = ftdi_sio_b600; break; case 1200: div_value = ftdi_sio_b1200; break; case 2400: div_value = ftdi_sio_b2400; break; case 4800: div_value = ftdi_sio_b4800; break; case 9600: div_value = ftdi_sio_b9600; break; case 19200: div_value = ftdi_sio_b19200; break; case 38400: div_value = ftdi_sio_b38400; break; case 57600: div_value = ftdi_sio_b57600; break; case 115200: div_value = ftdi_sio_b115200; break; default: dev_dbg(dev, "%s - Baudrate (%d) requested is not supported\n", __func__, baud); div_value = ftdi_sio_b9600; baud = 9600; div_okay = 0; } break; case FT232A: if (baud <= 3000000) { div_value = ftdi_232am_baud_to_divisor(baud); } else { dev_dbg(dev, "%s - Baud rate too high!\n", __func__); baud = 9600; div_value = ftdi_232am_baud_to_divisor(9600); div_okay = 0; } break; case FT232B: case FT2232C: case FT232R: case FTX: if (baud <= 3000000) { u16 product_id = le16_to_cpu( port->serial->dev->descriptor.idProduct); if (((product_id == FTDI_NDI_HUC_PID) || (product_id == FTDI_NDI_SPECTRA_SCU_PID) || (product_id == FTDI_NDI_FUTURE_2_PID) || (product_id == FTDI_NDI_FUTURE_3_PID) || (product_id == FTDI_NDI_AURORA_SCU_PID)) && (baud == 19200)) { baud = 1200000; } div_value = ftdi_232bm_baud_to_divisor(baud); } else { dev_dbg(dev, "%s - Baud rate too high!\n", __func__); div_value = ftdi_232bm_baud_to_divisor(9600); div_okay = 0; baud = 9600; } break; default: if ((baud <= 12000000) && (baud >= 1200)) { div_value = ftdi_2232h_baud_to_divisor(baud); } else if (baud < 1200) { div_value = ftdi_232bm_baud_to_divisor(baud); } else { dev_dbg(dev, "%s - Baud rate too high!\n", __func__); div_value = ftdi_232bm_baud_to_divisor(9600); div_okay = 0; baud = 9600; } break; } if (div_okay) { dev_dbg(dev, "%s - Baud rate set to %d (divisor 0x%lX) on chip %s\n", __func__, baud, (unsigned long)div_value, ftdi_chip_name[priv->chip_type]); } tty_encode_baud_rate(tty, baud, baud); return div_value; } static int change_speed(struct tty_struct *tty, struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); u16 value; u16 index; u32 index_value; int rv; index_value = get_ftdi_divisor(tty, port); value = (u16)index_value; index = (u16)(index_value >> 16); if (priv->channel) index = (u16)((index << 8) | priv->channel); rv = usb_control_msg(port->serial->dev, usb_sndctrlpipe(port->serial->dev, 0), FTDI_SIO_SET_BAUDRATE_REQUEST, FTDI_SIO_SET_BAUDRATE_REQUEST_TYPE, value, index, NULL, 0, WDR_SHORT_TIMEOUT); return rv; } static int write_latency_timer(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_device *udev = port->serial->dev; int rv; int l = priv->latency; if (priv->chip_type == SIO || priv->chip_type == FT232A) return -EINVAL; if (priv->flags & ASYNC_LOW_LATENCY) l = 1; dev_dbg(&port->dev, "%s: setting latency timer = %i\n", __func__, l); rv = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), FTDI_SIO_SET_LATENCY_TIMER_REQUEST, FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE, l, priv->channel, NULL, 0, WDR_TIMEOUT); if (rv < 0) dev_err(&port->dev, "Unable to write latency timer: %i\n", rv); return rv; } static int _read_latency_timer(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_device *udev = port->serial->dev; u8 buf; int rv; rv = usb_control_msg_recv(udev, 0, FTDI_SIO_GET_LATENCY_TIMER_REQUEST, FTDI_SIO_GET_LATENCY_TIMER_REQUEST_TYPE, 0, priv->channel, &buf, 1, WDR_TIMEOUT, GFP_KERNEL); if (rv == 0) rv = buf; return rv; } static int read_latency_timer(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); int rv; if (priv->chip_type == SIO || priv->chip_type == FT232A) return -EINVAL; rv = _read_latency_timer(port); if (rv < 0) { dev_err(&port->dev, "Unable to read latency timer: %i\n", rv); return rv; } priv->latency = rv; return 0; } static void get_serial_info(struct tty_struct *tty, struct serial_struct *ss) { struct usb_serial_port *port = tty->driver_data; struct ftdi_private *priv = usb_get_serial_port_data(port); mutex_lock(&priv->cfg_lock); ss->flags = priv->flags; ss->baud_base = priv->baud_base; ss->custom_divisor = priv->custom_divisor; mutex_unlock(&priv->cfg_lock); } static int set_serial_info(struct tty_struct *tty, struct serial_struct *ss) { struct usb_serial_port *port = tty->driver_data; struct ftdi_private *priv = usb_get_serial_port_data(port); int old_flags, old_divisor; mutex_lock(&priv->cfg_lock); if (!capable(CAP_SYS_ADMIN)) { if ((ss->flags ^ priv->flags) & ~ASYNC_USR_MASK) { mutex_unlock(&priv->cfg_lock); return -EPERM; } } old_flags = priv->flags; old_divisor = priv->custom_divisor; priv->flags = ss->flags & ASYNC_FLAGS; priv->custom_divisor = ss->custom_divisor; write_latency_timer(port); if ((priv->flags ^ old_flags) & ASYNC_SPD_MASK || ((priv->flags & ASYNC_SPD_MASK) == ASYNC_SPD_CUST && priv->custom_divisor != old_divisor)) { /* warn about deprecation unless clearing */ if (priv->flags & ASYNC_SPD_MASK) dev_warn_ratelimited(&port->dev, "use of SPD flags is deprecated\n"); change_speed(tty, port); } mutex_unlock(&priv->cfg_lock); return 0; } static int get_lsr_info(struct usb_serial_port *port, unsigned int __user *retinfo) { struct ftdi_private *priv = usb_get_serial_port_data(port); unsigned int result = 0; if (priv->transmit_empty) result = TIOCSER_TEMT; if (copy_to_user(retinfo, &result, sizeof(unsigned int))) return -EFAULT; return 0; } static int ftdi_determine_type(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_serial *serial = port->serial; struct usb_device *udev = serial->dev; unsigned int version, ifnum; version = le16_to_cpu(udev->descriptor.bcdDevice); ifnum = serial->interface->cur_altsetting->desc.bInterfaceNumber; /* Assume Hi-Speed type */ priv->baud_base = 120000000 / 2; priv->channel = CHANNEL_A + ifnum; switch (version) { case 0x200: priv->chip_type = FT232A; priv->baud_base = 48000000 / 2; priv->channel = 0; /* * FT232B devices have a bug where bcdDevice gets set to 0x200 * when iSerialNumber is 0. Assume it is an FT232B in case the * latency timer is readable. */ if (udev->descriptor.iSerialNumber == 0 && _read_latency_timer(port) >= 0) { priv->chip_type = FT232B; } break; case 0x400: priv->chip_type = FT232B; priv->baud_base = 48000000 / 2; priv->channel = 0; break; case 0x500: priv->chip_type = FT2232C; priv->baud_base = 48000000 / 2; break; case 0x600: priv->chip_type = FT232R; priv->baud_base = 48000000 / 2; priv->channel = 0; break; case 0x700: priv->chip_type = FT2232H; break; case 0x800: priv->chip_type = FT4232H; break; case 0x900: priv->chip_type = FT232H; break; case 0x1000: priv->chip_type = FTX; priv->baud_base = 48000000 / 2; break; case 0x2800: priv->chip_type = FT2233HP; break; case 0x2900: priv->chip_type = FT4233HP; break; case 0x3000: priv->chip_type = FT2232HP; break; case 0x3100: priv->chip_type = FT4232HP; break; case 0x3200: priv->chip_type = FT233HP; break; case 0x3300: priv->chip_type = FT232HP; break; case 0x3600: priv->chip_type = FT4232HA; break; default: if (version < 0x200) { priv->chip_type = SIO; priv->baud_base = 12000000 / 16; priv->channel = 0; } else { dev_err(&port->dev, "unknown device type: 0x%02x\n", version); return -ENODEV; } } dev_info(&udev->dev, "Detected %s\n", ftdi_chip_name[priv->chip_type]); return 0; } /* * Determine the maximum packet size for the device. This depends on the chip * type and the USB host capabilities. The value should be obtained from the * device descriptor as the chip will use the appropriate values for the host. */ static void ftdi_set_max_packet_size(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_interface *interface = port->serial->interface; struct usb_endpoint_descriptor *ep_desc; unsigned num_endpoints; unsigned i; num_endpoints = interface->cur_altsetting->desc.bNumEndpoints; if (!num_endpoints) return; /* * NOTE: Some customers have programmed FT232R/FT245R devices * with an endpoint size of 0 - not good. In this case, we * want to override the endpoint descriptor setting and use a * value of 64 for wMaxPacketSize. */ for (i = 0; i < num_endpoints; i++) { ep_desc = &interface->cur_altsetting->endpoint[i].desc; if (!ep_desc->wMaxPacketSize) { ep_desc->wMaxPacketSize = cpu_to_le16(0x40); dev_warn(&port->dev, "Overriding wMaxPacketSize on endpoint %d\n", usb_endpoint_num(ep_desc)); } } /* Set max packet size based on last descriptor. */ priv->max_packet_size = usb_endpoint_maxp(ep_desc); } /* * *************************************************************************** * Sysfs Attribute * *************************************************************************** */ static ssize_t latency_timer_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_serial_port *port = to_usb_serial_port(dev); struct ftdi_private *priv = usb_get_serial_port_data(port); if (priv->flags & ASYNC_LOW_LATENCY) return sprintf(buf, "1\n"); else return sprintf(buf, "%u\n", priv->latency); } /* Write a new value of the latency timer, in units of milliseconds. */ static ssize_t latency_timer_store(struct device *dev, struct device_attribute *attr, const char *valbuf, size_t count) { struct usb_serial_port *port = to_usb_serial_port(dev); struct ftdi_private *priv = usb_get_serial_port_data(port); u8 v; int rv; if (kstrtou8(valbuf, 10, &v)) return -EINVAL; priv->latency = v; rv = write_latency_timer(port); if (rv < 0) return -EIO; return count; } static DEVICE_ATTR_RW(latency_timer); /* Write an event character directly to the FTDI register. The ASCII value is in the low 8 bits, with the enable bit in the 9th bit. */ static ssize_t event_char_store(struct device *dev, struct device_attribute *attr, const char *valbuf, size_t count) { struct usb_serial_port *port = to_usb_serial_port(dev); struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_device *udev = port->serial->dev; unsigned int v; int rv; if (kstrtouint(valbuf, 0, &v) || v >= 0x200) return -EINVAL; dev_dbg(&port->dev, "%s: setting event char = 0x%03x\n", __func__, v); rv = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), FTDI_SIO_SET_EVENT_CHAR_REQUEST, FTDI_SIO_SET_EVENT_CHAR_REQUEST_TYPE, v, priv->channel, NULL, 0, WDR_TIMEOUT); if (rv < 0) { dev_dbg(&port->dev, "Unable to write event character: %i\n", rv); return -EIO; } return count; } static DEVICE_ATTR_WO(event_char); static struct attribute *ftdi_attrs[] = { &dev_attr_event_char.attr, &dev_attr_latency_timer.attr, NULL }; static umode_t ftdi_is_visible(struct kobject *kobj, struct attribute *attr, int idx) { struct device *dev = kobj_to_dev(kobj); struct usb_serial_port *port = to_usb_serial_port(dev); struct ftdi_private *priv = usb_get_serial_port_data(port); enum ftdi_chip_type type = priv->chip_type; if (attr == &dev_attr_event_char.attr) { if (type == SIO) return 0; } if (attr == &dev_attr_latency_timer.attr) { if (type == SIO || type == FT232A) return 0; } return attr->mode; } static const struct attribute_group ftdi_group = { .attrs = ftdi_attrs, .is_visible = ftdi_is_visible, }; static const struct attribute_group *ftdi_groups[] = { &ftdi_group, NULL }; #ifdef CONFIG_GPIOLIB static int ftdi_set_bitmode(struct usb_serial_port *port, u8 mode) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_serial *serial = port->serial; int result; u16 val; result = usb_autopm_get_interface(serial->interface); if (result) return result; val = (mode << 8) | (priv->gpio_output << 4) | priv->gpio_value; result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), FTDI_SIO_SET_BITMODE_REQUEST, FTDI_SIO_SET_BITMODE_REQUEST_TYPE, val, priv->channel, NULL, 0, WDR_TIMEOUT); if (result < 0) { dev_err(&serial->interface->dev, "bitmode request failed for value 0x%04x: %d\n", val, result); } usb_autopm_put_interface(serial->interface); return result; } static int ftdi_set_cbus_pins(struct usb_serial_port *port) { return ftdi_set_bitmode(port, FTDI_SIO_BITMODE_CBUS); } static int ftdi_exit_cbus_mode(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); priv->gpio_output = 0; priv->gpio_value = 0; return ftdi_set_bitmode(port, FTDI_SIO_BITMODE_RESET); } static int ftdi_gpio_request(struct gpio_chip *gc, unsigned int offset) { struct usb_serial_port *port = gpiochip_get_data(gc); struct ftdi_private *priv = usb_get_serial_port_data(port); int result; mutex_lock(&priv->gpio_lock); if (!priv->gpio_used) { /* Set default pin states, as we cannot get them from device */ priv->gpio_output = 0x00; priv->gpio_value = 0x00; result = ftdi_set_cbus_pins(port); if (result) { mutex_unlock(&priv->gpio_lock); return result; } priv->gpio_used = true; } mutex_unlock(&priv->gpio_lock); return 0; } static int ftdi_read_cbus_pins(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_serial *serial = port->serial; u8 buf; int result; result = usb_autopm_get_interface(serial->interface); if (result) return result; result = usb_control_msg_recv(serial->dev, 0, FTDI_SIO_READ_PINS_REQUEST, FTDI_SIO_READ_PINS_REQUEST_TYPE, 0, priv->channel, &buf, 1, WDR_TIMEOUT, GFP_KERNEL); if (result == 0) result = buf; usb_autopm_put_interface(serial->interface); return result; } static int ftdi_gpio_get(struct gpio_chip *gc, unsigned int gpio) { struct usb_serial_port *port = gpiochip_get_data(gc); int result; result = ftdi_read_cbus_pins(port); if (result < 0) return result; return !!(result & BIT(gpio)); } static int ftdi_gpio_set(struct gpio_chip *gc, unsigned int gpio, int value) { struct usb_serial_port *port = gpiochip_get_data(gc); struct ftdi_private *priv = usb_get_serial_port_data(port); int result; mutex_lock(&priv->gpio_lock); if (value) priv->gpio_value |= BIT(gpio); else priv->gpio_value &= ~BIT(gpio); result = ftdi_set_cbus_pins(port); mutex_unlock(&priv->gpio_lock); return result; } static int ftdi_gpio_get_multiple(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits) { struct usb_serial_port *port = gpiochip_get_data(gc); int result; result = ftdi_read_cbus_pins(port); if (result < 0) return result; *bits = result & *mask; return 0; } static int ftdi_gpio_set_multiple(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits) { struct usb_serial_port *port = gpiochip_get_data(gc); struct ftdi_private *priv = usb_get_serial_port_data(port); int result; mutex_lock(&priv->gpio_lock); priv->gpio_value &= ~(*mask); priv->gpio_value |= *bits & *mask; result = ftdi_set_cbus_pins(port); mutex_unlock(&priv->gpio_lock); return result; } static int ftdi_gpio_direction_get(struct gpio_chip *gc, unsigned int gpio) { struct usb_serial_port *port = gpiochip_get_data(gc); struct ftdi_private *priv = usb_get_serial_port_data(port); return !(priv->gpio_output & BIT(gpio)); } static int ftdi_gpio_direction_input(struct gpio_chip *gc, unsigned int gpio) { struct usb_serial_port *port = gpiochip_get_data(gc); struct ftdi_private *priv = usb_get_serial_port_data(port); int result; mutex_lock(&priv->gpio_lock); priv->gpio_output &= ~BIT(gpio); result = ftdi_set_cbus_pins(port); mutex_unlock(&priv->gpio_lock); return result; } static int ftdi_gpio_direction_output(struct gpio_chip *gc, unsigned int gpio, int value) { struct usb_serial_port *port = gpiochip_get_data(gc); struct ftdi_private *priv = usb_get_serial_port_data(port); int result; mutex_lock(&priv->gpio_lock); priv->gpio_output |= BIT(gpio); if (value) priv->gpio_value |= BIT(gpio); else priv->gpio_value &= ~BIT(gpio); result = ftdi_set_cbus_pins(port); mutex_unlock(&priv->gpio_lock); return result; } static int ftdi_gpio_init_valid_mask(struct gpio_chip *gc, unsigned long *valid_mask, unsigned int ngpios) { struct usb_serial_port *port = gpiochip_get_data(gc); struct ftdi_private *priv = usb_get_serial_port_data(port); unsigned long map = priv->gpio_altfunc; bitmap_complement(valid_mask, &map, ngpios); if (bitmap_empty(valid_mask, ngpios)) dev_dbg(&port->dev, "no CBUS pin configured for GPIO\n"); else dev_dbg(&port->dev, "CBUS%*pbl configured for GPIO\n", ngpios, valid_mask); return 0; } static int ftdi_read_eeprom(struct usb_serial *serial, void *dst, u16 addr, u16 nbytes) { int read = 0; if (addr % 2 != 0) return -EINVAL; if (nbytes % 2 != 0) return -EINVAL; /* Read EEPROM two bytes at a time */ while (read < nbytes) { int rv; rv = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), FTDI_SIO_READ_EEPROM_REQUEST, FTDI_SIO_READ_EEPROM_REQUEST_TYPE, 0, (addr + read) / 2, dst + read, 2, WDR_TIMEOUT); if (rv < 2) { if (rv >= 0) return -EIO; else return rv; } read += rv; } return 0; } static int ftdi_gpio_init_ft232h(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); u16 cbus_config; u8 *buf; int ret; int i; buf = kmalloc(4, GFP_KERNEL); if (!buf) return -ENOMEM; ret = ftdi_read_eeprom(port->serial, buf, 0x1a, 4); if (ret < 0) goto out_free; /* * FT232H CBUS Memory Map * * 0x1a: X- (upper nibble -> AC5) * 0x1b: -X (lower nibble -> AC6) * 0x1c: XX (upper nibble -> AC9 | lower nibble -> AC8) */ cbus_config = buf[2] << 8 | (buf[1] & 0xf) << 4 | (buf[0] & 0xf0) >> 4; priv->gc.ngpio = 4; priv->gpio_altfunc = 0xff; for (i = 0; i < priv->gc.ngpio; ++i) { if ((cbus_config & 0xf) == FTDI_FTX_CBUS_MUX_GPIO) priv->gpio_altfunc &= ~BIT(i); cbus_config >>= 4; } out_free: kfree(buf); return ret; } static int ftdi_gpio_init_ft232r(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); u16 cbus_config; u8 *buf; int ret; int i; buf = kmalloc(2, GFP_KERNEL); if (!buf) return -ENOMEM; ret = ftdi_read_eeprom(port->serial, buf, 0x14, 2); if (ret < 0) goto out_free; cbus_config = le16_to_cpup((__le16 *)buf); dev_dbg(&port->dev, "cbus_config = 0x%04x\n", cbus_config); priv->gc.ngpio = 4; priv->gpio_altfunc = 0xff; for (i = 0; i < priv->gc.ngpio; ++i) { if ((cbus_config & 0xf) == FTDI_FT232R_CBUS_MUX_GPIO) priv->gpio_altfunc &= ~BIT(i); cbus_config >>= 4; } out_free: kfree(buf); return ret; } static int ftdi_gpio_init_ftx(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_serial *serial = port->serial; const u16 cbus_cfg_addr = 0x1a; const u16 cbus_cfg_size = 4; u8 *cbus_cfg_buf; int result; u8 i; cbus_cfg_buf = kmalloc(cbus_cfg_size, GFP_KERNEL); if (!cbus_cfg_buf) return -ENOMEM; result = ftdi_read_eeprom(serial, cbus_cfg_buf, cbus_cfg_addr, cbus_cfg_size); if (result < 0) goto out_free; /* FIXME: FT234XD alone has 1 GPIO, but how to recognize this IC? */ priv->gc.ngpio = 4; /* Determine which pins are configured for CBUS bitbanging */ priv->gpio_altfunc = 0xff; for (i = 0; i < priv->gc.ngpio; ++i) { if (cbus_cfg_buf[i] == FTDI_FTX_CBUS_MUX_GPIO) priv->gpio_altfunc &= ~BIT(i); } out_free: kfree(cbus_cfg_buf); return result; } static int ftdi_gpio_init(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_serial *serial = port->serial; int result; switch (priv->chip_type) { case FT232H: result = ftdi_gpio_init_ft232h(port); break; case FT232R: result = ftdi_gpio_init_ft232r(port); break; case FTX: result = ftdi_gpio_init_ftx(port); break; default: return 0; } if (result < 0) return result; mutex_init(&priv->gpio_lock); priv->gc.label = "ftdi-cbus"; priv->gc.request = ftdi_gpio_request; priv->gc.get_direction = ftdi_gpio_direction_get; priv->gc.direction_input = ftdi_gpio_direction_input; priv->gc.direction_output = ftdi_gpio_direction_output; priv->gc.init_valid_mask = ftdi_gpio_init_valid_mask; priv->gc.get = ftdi_gpio_get; priv->gc.set = ftdi_gpio_set; priv->gc.get_multiple = ftdi_gpio_get_multiple; priv->gc.set_multiple = ftdi_gpio_set_multiple; priv->gc.owner = THIS_MODULE; priv->gc.parent = &serial->interface->dev; priv->gc.base = -1; priv->gc.can_sleep = true; result = gpiochip_add_data(&priv->gc, port); if (!result) priv->gpio_registered = true; return result; } static void ftdi_gpio_remove(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); if (priv->gpio_registered) { gpiochip_remove(&priv->gc); priv->gpio_registered = false; } if (priv->gpio_used) { /* Exiting CBUS-mode does not reset pin states. */ ftdi_exit_cbus_mode(port); priv->gpio_used = false; } } #else static int ftdi_gpio_init(struct usb_serial_port *port) { return 0; } static void ftdi_gpio_remove(struct usb_serial_port *port) { } #endif /* CONFIG_GPIOLIB */ /* * *************************************************************************** * FTDI driver specific functions * *************************************************************************** */ static int ftdi_probe(struct usb_serial *serial, const struct usb_device_id *id) { const struct ftdi_quirk *quirk = (struct ftdi_quirk *)id->driver_info; if (quirk && quirk->probe) { int ret = quirk->probe(serial); if (ret != 0) return ret; } usb_set_serial_data(serial, (void *)id->driver_info); return 0; } static int ftdi_port_probe(struct usb_serial_port *port) { const struct ftdi_quirk *quirk = usb_get_serial_data(port->serial); struct ftdi_private *priv; int result; priv = kzalloc(sizeof(struct ftdi_private), GFP_KERNEL); if (!priv) return -ENOMEM; mutex_init(&priv->cfg_lock); if (quirk && quirk->port_probe) quirk->port_probe(priv); usb_set_serial_port_data(port, priv); result = ftdi_determine_type(port); if (result) goto err_free; ftdi_set_max_packet_size(port); if (read_latency_timer(port) < 0) priv->latency = 16; write_latency_timer(port); result = ftdi_gpio_init(port); if (result < 0) { dev_err(&port->serial->interface->dev, "GPIO initialisation failed: %d\n", result); } return 0; err_free: kfree(priv); return result; } /* Setup for the USB-UIRT device, which requires hardwired * baudrate (38400 gets mapped to 312500) */ /* Called from usbserial:serial_probe */ static void ftdi_USB_UIRT_setup(struct ftdi_private *priv) { priv->flags |= ASYNC_SPD_CUST; priv->custom_divisor = 77; priv->force_baud = 38400; } /* Setup for the HE-TIRA1 device, which requires hardwired * baudrate (38400 gets mapped to 100000) and RTS-CTS enabled. */ static void ftdi_HE_TIRA1_setup(struct ftdi_private *priv) { priv->flags |= ASYNC_SPD_CUST; priv->custom_divisor = 240; priv->force_baud = 38400; priv->force_rtscts = 1; } /* * Module parameter to control latency timer for NDI FTDI-based USB devices. * If this value is not set in /etc/modprobe.d/ its value will be set * to 1ms. */ static int ndi_latency_timer = 1; /* Setup for the NDI FTDI-based USB devices, which requires hardwired * baudrate (19200 gets mapped to 1200000). * * Called from usbserial:serial_probe. */ static int ftdi_NDI_device_setup(struct usb_serial *serial) { struct usb_device *udev = serial->dev; int latency = ndi_latency_timer; if (latency == 0) latency = 1; if (latency > 99) latency = 99; dev_dbg(&udev->dev, "%s setting NDI device latency to %d\n", __func__, latency); dev_info(&udev->dev, "NDI device with a latency value of %d\n", latency); /* FIXME: errors are not returned */ usb_control_msg(udev, usb_sndctrlpipe(udev, 0), FTDI_SIO_SET_LATENCY_TIMER_REQUEST, FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE, latency, 0, NULL, 0, WDR_TIMEOUT); return 0; } /* * First port on JTAG adaptors such as Olimex arm-usb-ocd or the FIC/OpenMoko * Neo1973 Debug Board is reserved for JTAG interface and can be accessed from * userspace using openocd. */ static int ftdi_jtag_probe(struct usb_serial *serial) { struct usb_interface *intf = serial->interface; int ifnum = intf->cur_altsetting->desc.bInterfaceNumber; if (ifnum == 0) { dev_info(&intf->dev, "Ignoring interface reserved for JTAG\n"); return -ENODEV; } return 0; } static int ftdi_8u2232c_probe(struct usb_serial *serial) { struct usb_device *udev = serial->dev; if (udev->manufacturer && !strcmp(udev->manufacturer, "CALAO Systems")) return ftdi_jtag_probe(serial); if (udev->product && (!strcmp(udev->product, "Arrow USB Blaster") || !strcmp(udev->product, "BeagleBone/XDS100V2") || !strcmp(udev->product, "SNAP Connect E10"))) return ftdi_jtag_probe(serial); return 0; } /* * First two ports on JTAG adaptors using an FT4232 such as STMicroelectronics's * ST Micro Connect Lite are reserved for JTAG or other non-UART interfaces and * can be accessed from userspace. * The next two ports are enabled as UARTs by default, where port 2 is * a conventional RS-232 UART. */ static int ftdi_stmclite_probe(struct usb_serial *serial) { struct usb_interface *intf = serial->interface; int ifnum = intf->cur_altsetting->desc.bInterfaceNumber; if (ifnum < 2) { dev_info(&intf->dev, "Ignoring interface reserved for JTAG\n"); return -ENODEV; } return 0; } static void ftdi_port_remove(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); ftdi_gpio_remove(port); kfree(priv); } static int ftdi_open(struct tty_struct *tty, struct usb_serial_port *port) { struct usb_device *dev = port->serial->dev; struct ftdi_private *priv = usb_get_serial_port_data(port); /* No error checking for this (will get errors later anyway) */ /* See ftdi_sio.h for description of what is reset */ usb_control_msg(dev, usb_sndctrlpipe(dev, 0), FTDI_SIO_RESET_REQUEST, FTDI_SIO_RESET_REQUEST_TYPE, FTDI_SIO_RESET_SIO, priv->channel, NULL, 0, WDR_TIMEOUT); /* Termios defaults are set by usb_serial_init. We don't change port->tty->termios - this would lose speed settings, etc. This is same behaviour as serial.c/rs_open() - Kuba */ /* ftdi_set_termios will send usb control messages */ if (tty) ftdi_set_termios(tty, port, NULL); return usb_serial_generic_open(tty, port); } static void ftdi_dtr_rts(struct usb_serial_port *port, int on) { struct ftdi_private *priv = usb_get_serial_port_data(port); /* Disable flow control */ if (!on) { if (usb_control_msg(port->serial->dev, usb_sndctrlpipe(port->serial->dev, 0), FTDI_SIO_SET_FLOW_CTRL_REQUEST, FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE, 0, priv->channel, NULL, 0, WDR_TIMEOUT) < 0) { dev_err(&port->dev, "error from flowcontrol urb\n"); } } /* drop RTS and DTR */ if (on) set_mctrl(port, TIOCM_DTR | TIOCM_RTS); else clear_mctrl(port, TIOCM_DTR | TIOCM_RTS); } /* The SIO requires the first byte to have: * B0 1 * B1 0 * B2..7 length of message excluding byte 0 * * The new devices do not require this byte */ static int ftdi_prepare_write_buffer(struct usb_serial_port *port, void *dest, size_t size) { struct ftdi_private *priv; int count; unsigned long flags; priv = usb_get_serial_port_data(port); if (priv->chip_type == SIO) { unsigned char *buffer = dest; int i, len, c; count = 0; spin_lock_irqsave(&port->lock, flags); for (i = 0; i < size - 1; i += priv->max_packet_size) { len = min_t(int, size - i, priv->max_packet_size) - 1; c = kfifo_out(&port->write_fifo, &buffer[i + 1], len); if (!c) break; port->icount.tx += c; buffer[i] = (c << 2) + 1; count += c + 1; } spin_unlock_irqrestore(&port->lock, flags); } else { count = kfifo_out_locked(&port->write_fifo, dest, size, &port->lock); port->icount.tx += count; } return count; } #define FTDI_RS_ERR_MASK (FTDI_RS_BI | FTDI_RS_PE | FTDI_RS_FE | FTDI_RS_OE) static int ftdi_process_packet(struct usb_serial_port *port, struct ftdi_private *priv, unsigned char *buf, int len) { unsigned char status; bool brkint = false; int i; char flag; if (len < 2) { dev_dbg(&port->dev, "malformed packet\n"); return 0; } /* Compare new line status to the old one, signal if different/ N.B. packet may be processed more than once, but differences are only processed once. */ status = buf[0] & FTDI_STATUS_B0_MASK; if (status != priv->prev_status) { char diff_status = status ^ priv->prev_status; if (diff_status & FTDI_RS0_CTS) port->icount.cts++; if (diff_status & FTDI_RS0_DSR) port->icount.dsr++; if (diff_status & FTDI_RS0_RI) port->icount.rng++; if (diff_status & FTDI_RS0_RLSD) { struct tty_struct *tty; port->icount.dcd++; tty = tty_port_tty_get(&port->port); if (tty) usb_serial_handle_dcd_change(port, tty, status & FTDI_RS0_RLSD); tty_kref_put(tty); } wake_up_interruptible(&port->port.delta_msr_wait); priv->prev_status = status; } /* save if the transmitter is empty or not */ if (buf[1] & FTDI_RS_TEMT) priv->transmit_empty = 1; else priv->transmit_empty = 0; if (len == 2) return 0; /* status only */ /* * Break and error status must only be processed for packets with * data payload to avoid over-reporting. */ flag = TTY_NORMAL; if (buf[1] & FTDI_RS_ERR_MASK) { /* * Break takes precedence over parity, which takes precedence * over framing errors. Note that break is only associated * with the last character in the buffer and only when it's a * NUL. */ if (buf[1] & FTDI_RS_BI && buf[len - 1] == '\0') { port->icount.brk++; brkint = true; } if (buf[1] & FTDI_RS_PE) { flag = TTY_PARITY; port->icount.parity++; } else if (buf[1] & FTDI_RS_FE) { flag = TTY_FRAME; port->icount.frame++; } /* Overrun is special, not associated with a char */ if (buf[1] & FTDI_RS_OE) { port->icount.overrun++; tty_insert_flip_char(&port->port, 0, TTY_OVERRUN); } } port->icount.rx += len - 2; if (brkint || port->sysrq) { for (i = 2; i < len; i++) { if (brkint && i == len - 1) { if (usb_serial_handle_break(port)) return len - 3; flag = TTY_BREAK; } if (usb_serial_handle_sysrq_char(port, buf[i])) continue; tty_insert_flip_char(&port->port, buf[i], flag); } } else { tty_insert_flip_string_fixed_flag(&port->port, buf + 2, flag, len - 2); } return len - 2; } static void ftdi_process_read_urb(struct urb *urb) { struct usb_serial_port *port = urb->context; struct ftdi_private *priv = usb_get_serial_port_data(port); char *data = urb->transfer_buffer; int i; int len; int count = 0; for (i = 0; i < urb->actual_length; i += priv->max_packet_size) { len = min_t(int, urb->actual_length - i, priv->max_packet_size); count += ftdi_process_packet(port, priv, &data[i], len); } if (count) tty_flip_buffer_push(&port->port); } static int ftdi_break_ctl(struct tty_struct *tty, int break_state) { struct usb_serial_port *port = tty->driver_data; struct ftdi_private *priv = usb_get_serial_port_data(port); u16 value; int ret; /* break_state = -1 to turn on break, and 0 to turn off break */ /* see drivers/char/tty_io.c to see it used */ /* last_set_data_value NEVER has the break bit set in it */ if (break_state) value = priv->last_set_data_value | FTDI_SIO_SET_BREAK; else value = priv->last_set_data_value; ret = usb_control_msg(port->serial->dev, usb_sndctrlpipe(port->serial->dev, 0), FTDI_SIO_SET_DATA_REQUEST, FTDI_SIO_SET_DATA_REQUEST_TYPE, value, priv->channel, NULL, 0, WDR_TIMEOUT); if (ret < 0) { dev_err(&port->dev, "%s FAILED to enable/disable break state (state was %d)\n", __func__, break_state); return ret; } dev_dbg(&port->dev, "%s break state is %d - urb is %d\n", __func__, break_state, value); return 0; } static bool ftdi_tx_empty(struct usb_serial_port *port) { unsigned char buf[2]; int ret; ret = ftdi_get_modem_status(port, buf); if (ret == 2) { if (!(buf[1] & FTDI_RS_TEMT)) return false; } return true; } /* old_termios contains the original termios settings and tty->termios contains * the new setting to be used * WARNING: set_termios calls this with old_termios in kernel space */ static void ftdi_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios) { struct usb_device *dev = port->serial->dev; struct device *ddev = &port->dev; struct ftdi_private *priv = usb_get_serial_port_data(port); struct ktermios *termios = &tty->termios; unsigned int cflag; u16 value, index; int ret; /* Force baud rate if this device requires it, unless it is set to B0. */ if (priv->force_baud && ((termios->c_cflag & CBAUD) != B0)) { dev_dbg(ddev, "%s: forcing baud rate for this device\n", __func__); tty_encode_baud_rate(tty, priv->force_baud, priv->force_baud); } /* Force RTS-CTS if this device requires it. */ if (priv->force_rtscts) { dev_dbg(ddev, "%s: forcing rtscts for this device\n", __func__); termios->c_cflag |= CRTSCTS; } /* * All FTDI UART chips are limited to CS7/8. We shouldn't pretend to * support CS5/6 and revert the CSIZE setting instead. * * CS5 however is used to control some smartcard readers which abuse * this limitation to switch modes. Original FTDI chips fall back to * eight data bits. * * TODO: Implement a quirk to only allow this with mentioned * readers. One I know of (Argolis Smartreader V1) * returns "USB smartcard server" as iInterface string. * The vendor didn't bother with a custom VID/PID of * course. */ if (C_CSIZE(tty) == CS6) { dev_warn(ddev, "requested CSIZE setting not supported\n"); termios->c_cflag &= ~CSIZE; if (old_termios) termios->c_cflag |= old_termios->c_cflag & CSIZE; else termios->c_cflag |= CS8; } cflag = termios->c_cflag; if (!old_termios) goto no_skip; if (old_termios->c_cflag == termios->c_cflag && old_termios->c_ispeed == termios->c_ispeed && old_termios->c_ospeed == termios->c_ospeed) goto no_c_cflag_changes; /* NOTE These routines can get interrupted by ftdi_sio_read_bulk_callback - need to examine what this means - don't see any problems yet */ if ((old_termios->c_cflag & (CSIZE|PARODD|PARENB|CMSPAR|CSTOPB)) == (termios->c_cflag & (CSIZE|PARODD|PARENB|CMSPAR|CSTOPB))) goto no_data_parity_stop_changes; no_skip: /* Set number of data bits, parity, stop bits */ value = 0; value |= (cflag & CSTOPB ? FTDI_SIO_SET_DATA_STOP_BITS_2 : FTDI_SIO_SET_DATA_STOP_BITS_1); if (cflag & PARENB) { if (cflag & CMSPAR) value |= cflag & PARODD ? FTDI_SIO_SET_DATA_PARITY_MARK : FTDI_SIO_SET_DATA_PARITY_SPACE; else value |= cflag & PARODD ? FTDI_SIO_SET_DATA_PARITY_ODD : FTDI_SIO_SET_DATA_PARITY_EVEN; } else { value |= FTDI_SIO_SET_DATA_PARITY_NONE; } switch (cflag & CSIZE) { case CS5: dev_dbg(ddev, "Setting CS5 quirk\n"); break; case CS7: value |= 7; dev_dbg(ddev, "Setting CS7\n"); break; default: case CS8: value |= 8; dev_dbg(ddev, "Setting CS8\n"); break; } /* This is needed by the break command since it uses the same command - but is or'ed with this value */ priv->last_set_data_value = value; if (usb_control_msg(dev, usb_sndctrlpipe(dev, 0), FTDI_SIO_SET_DATA_REQUEST, FTDI_SIO_SET_DATA_REQUEST_TYPE, value, priv->channel, NULL, 0, WDR_SHORT_TIMEOUT) < 0) { dev_err(ddev, "%s FAILED to set databits/stopbits/parity\n", __func__); } /* Now do the baudrate */ no_data_parity_stop_changes: if ((cflag & CBAUD) == B0) { /* Disable flow control */ if (usb_control_msg(dev, usb_sndctrlpipe(dev, 0), FTDI_SIO_SET_FLOW_CTRL_REQUEST, FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE, 0, priv->channel, NULL, 0, WDR_TIMEOUT) < 0) { dev_err(ddev, "%s error from disable flowcontrol urb\n", __func__); } /* Drop RTS and DTR */ clear_mctrl(port, TIOCM_DTR | TIOCM_RTS); } else { /* set the baudrate determined before */ mutex_lock(&priv->cfg_lock); if (change_speed(tty, port)) dev_err(ddev, "%s urb failed to set baudrate\n", __func__); mutex_unlock(&priv->cfg_lock); /* Ensure RTS and DTR are raised when baudrate changed from 0 */ if (old_termios && (old_termios->c_cflag & CBAUD) == B0) set_mctrl(port, TIOCM_DTR | TIOCM_RTS); } no_c_cflag_changes: /* Set hardware-assisted flow control */ value = 0; if (C_CRTSCTS(tty)) { dev_dbg(&port->dev, "enabling rts/cts flow control\n"); index = FTDI_SIO_RTS_CTS_HS; } else if (I_IXON(tty)) { dev_dbg(&port->dev, "enabling xon/xoff flow control\n"); index = FTDI_SIO_XON_XOFF_HS; value = STOP_CHAR(tty) << 8 | START_CHAR(tty); } else { dev_dbg(&port->dev, "disabling flow control\n"); index = FTDI_SIO_DISABLE_FLOW_CTRL; } index |= priv->channel; ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), FTDI_SIO_SET_FLOW_CTRL_REQUEST, FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE, value, index, NULL, 0, WDR_TIMEOUT); if (ret < 0) dev_err(&port->dev, "failed to set flow control: %d\n", ret); } /* * Get modem-control status. * * Returns the number of status bytes retrieved (device dependant), or * negative error code. */ static int ftdi_get_modem_status(struct usb_serial_port *port, unsigned char status[2]) { struct ftdi_private *priv = usb_get_serial_port_data(port); unsigned char *buf; int len; int ret; buf = kmalloc(2, GFP_KERNEL); if (!buf) return -ENOMEM; /* * The device returns a two byte value (the SIO a 1 byte value) in the * same format as the data returned from the IN endpoint. */ if (priv->chip_type == SIO) len = 1; else len = 2; ret = usb_control_msg(port->serial->dev, usb_rcvctrlpipe(port->serial->dev, 0), FTDI_SIO_GET_MODEM_STATUS_REQUEST, FTDI_SIO_GET_MODEM_STATUS_REQUEST_TYPE, 0, priv->channel, buf, len, WDR_TIMEOUT); /* NOTE: We allow short responses and handle that below. */ if (ret < 1) { dev_err(&port->dev, "failed to get modem status: %d\n", ret); if (ret >= 0) ret = -EIO; ret = usb_translate_errors(ret); goto out; } status[0] = buf[0]; if (ret > 1) status[1] = buf[1]; else status[1] = 0; dev_dbg(&port->dev, "%s - 0x%02x%02x\n", __func__, status[0], status[1]); out: kfree(buf); return ret; } static int ftdi_tiocmget(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct ftdi_private *priv = usb_get_serial_port_data(port); unsigned char buf[2]; int ret; ret = ftdi_get_modem_status(port, buf); if (ret < 0) return ret; ret = (buf[0] & FTDI_SIO_DSR_MASK ? TIOCM_DSR : 0) | (buf[0] & FTDI_SIO_CTS_MASK ? TIOCM_CTS : 0) | (buf[0] & FTDI_SIO_RI_MASK ? TIOCM_RI : 0) | (buf[0] & FTDI_SIO_RLSD_MASK ? TIOCM_CD : 0) | priv->last_dtr_rts; return ret; } static int ftdi_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; return update_mctrl(port, set, clear); } static int ftdi_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct usb_serial_port *port = tty->driver_data; void __user *argp = (void __user *)arg; switch (cmd) { case TIOCSERGETLSR: return get_lsr_info(port, argp); default: break; } return -ENOIOCTLCMD; } static struct usb_serial_driver ftdi_device = { .driver = { .name = "ftdi_sio", .dev_groups = ftdi_groups, }, .description = "FTDI USB Serial Device", .id_table = id_table_combined, .num_ports = 1, .bulk_in_size = 512, .bulk_out_size = 256, .probe = ftdi_probe, .port_probe = ftdi_port_probe, .port_remove = ftdi_port_remove, .open = ftdi_open, .dtr_rts = ftdi_dtr_rts, .throttle = usb_serial_generic_throttle, .unthrottle = usb_serial_generic_unthrottle, .process_read_urb = ftdi_process_read_urb, .prepare_write_buffer = ftdi_prepare_write_buffer, .tiocmget = ftdi_tiocmget, .tiocmset = ftdi_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .get_icount = usb_serial_generic_get_icount, .ioctl = ftdi_ioctl, .get_serial = get_serial_info, .set_serial = set_serial_info, .set_termios = ftdi_set_termios, .break_ctl = ftdi_break_ctl, .tx_empty = ftdi_tx_empty, }; static struct usb_serial_driver * const serial_drivers[] = { &ftdi_device, NULL }; module_usb_serial_driver(serial_drivers, id_table_combined); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); module_param(ndi_latency_timer, int, 0644); MODULE_PARM_DESC(ndi_latency_timer, "NDI device latency timer override");
4 14 10 5 87 79 79 11 5 45 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_VIRTIO_VSOCK_H #define _LINUX_VIRTIO_VSOCK_H #include <uapi/linux/virtio_vsock.h> #include <linux/socket.h> #include <net/sock.h> #include <net/af_vsock.h> #define VIRTIO_VSOCK_SKB_HEADROOM (sizeof(struct virtio_vsock_hdr)) struct virtio_vsock_skb_cb { bool reply; bool tap_delivered; u32 offset; }; #define VIRTIO_VSOCK_SKB_CB(skb) ((struct virtio_vsock_skb_cb *)((skb)->cb)) static inline struct virtio_vsock_hdr *virtio_vsock_hdr(struct sk_buff *skb) { return (struct virtio_vsock_hdr *)skb->head; } static inline bool virtio_vsock_skb_reply(struct sk_buff *skb) { return VIRTIO_VSOCK_SKB_CB(skb)->reply; } static inline void virtio_vsock_skb_set_reply(struct sk_buff *skb) { VIRTIO_VSOCK_SKB_CB(skb)->reply = true; } static inline bool virtio_vsock_skb_tap_delivered(struct sk_buff *skb) { return VIRTIO_VSOCK_SKB_CB(skb)->tap_delivered; } static inline void virtio_vsock_skb_set_tap_delivered(struct sk_buff *skb) { VIRTIO_VSOCK_SKB_CB(skb)->tap_delivered = true; } static inline void virtio_vsock_skb_clear_tap_delivered(struct sk_buff *skb) { VIRTIO_VSOCK_SKB_CB(skb)->tap_delivered = false; } static inline void virtio_vsock_skb_put(struct sk_buff *skb, u32 len) { DEBUG_NET_WARN_ON_ONCE(skb->len); if (skb_is_nonlinear(skb)) skb->len = len; else skb_put(skb, len); } static inline struct sk_buff * __virtio_vsock_alloc_skb_with_frags(unsigned int header_len, unsigned int data_len, gfp_t mask) { struct sk_buff *skb; int err; skb = alloc_skb_with_frags(header_len, data_len, PAGE_ALLOC_COSTLY_ORDER, &err, mask); if (!skb) return NULL; skb_reserve(skb, VIRTIO_VSOCK_SKB_HEADROOM); skb->data_len = data_len; return skb; } static inline struct sk_buff * virtio_vsock_alloc_linear_skb(unsigned int size, gfp_t mask) { return __virtio_vsock_alloc_skb_with_frags(size, 0, mask); } static inline struct sk_buff *virtio_vsock_alloc_skb(unsigned int size, gfp_t mask) { if (size <= SKB_WITH_OVERHEAD(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) return virtio_vsock_alloc_linear_skb(size, mask); size -= VIRTIO_VSOCK_SKB_HEADROOM; return __virtio_vsock_alloc_skb_with_frags(VIRTIO_VSOCK_SKB_HEADROOM, size, mask); } static inline void virtio_vsock_skb_queue_head(struct sk_buff_head *list, struct sk_buff *skb) { spin_lock_bh(&list->lock); __skb_queue_head(list, skb); spin_unlock_bh(&list->lock); } static inline void virtio_vsock_skb_queue_tail(struct sk_buff_head *list, struct sk_buff *skb) { spin_lock_bh(&list->lock); __skb_queue_tail(list, skb); spin_unlock_bh(&list->lock); } static inline struct sk_buff *virtio_vsock_skb_dequeue(struct sk_buff_head *list) { struct sk_buff *skb; spin_lock_bh(&list->lock); skb = __skb_dequeue(list); spin_unlock_bh(&list->lock); return skb; } static inline void virtio_vsock_skb_queue_purge(struct sk_buff_head *list) { spin_lock_bh(&list->lock); __skb_queue_purge(list); spin_unlock_bh(&list->lock); } static inline size_t virtio_vsock_skb_len(struct sk_buff *skb) { return (size_t)(skb_end_pointer(skb) - skb->head); } /* Dimension the RX SKB so that the entire thing fits exactly into * a single 4KiB page. This avoids wasting memory due to alloc_skb() * rounding up to the next page order and also means that we * don't leave higher-order pages sitting around in the RX queue. */ #define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE SKB_WITH_OVERHEAD(1024 * 4) #define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL #define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) enum { VSOCK_VQ_RX = 0, /* for host to guest data */ VSOCK_VQ_TX = 1, /* for guest to host data */ VSOCK_VQ_EVENT = 2, VSOCK_VQ_MAX = 3, }; /* Per-socket state (accessed via vsk->trans) */ struct virtio_vsock_sock { struct vsock_sock *vsk; spinlock_t tx_lock; spinlock_t rx_lock; /* Protected by tx_lock */ u32 tx_cnt; u32 peer_fwd_cnt; u32 peer_buf_alloc; size_t bytes_unsent; /* Protected by rx_lock */ u32 fwd_cnt; u32 last_fwd_cnt; u32 rx_bytes; u32 buf_alloc; u32 buf_used; struct sk_buff_head rx_queue; u32 msg_count; }; struct virtio_vsock_pkt_info { u32 remote_cid, remote_port; struct vsock_sock *vsk; struct msghdr *msg; u32 pkt_len; u16 type; u16 op; u32 flags; bool reply; }; struct virtio_transport { /* This must be the first field */ struct vsock_transport transport; /* Takes ownership of the packet */ int (*send_pkt)(struct sk_buff *skb); /* Used in MSG_ZEROCOPY mode. Checks, that provided data * (number of buffers) could be transmitted with zerocopy * mode. If this callback is not implemented for the current * transport - this means that this transport doesn't need * extra checks and can perform zerocopy transmission by * default. */ bool (*can_msgzerocopy)(int bufs_num); }; ssize_t virtio_transport_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg, size_t len, int type); int virtio_transport_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg, size_t len, int flags); int virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, struct msghdr *msg, size_t len); ssize_t virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, struct msghdr *msg, int flags); s64 virtio_transport_stream_has_data(struct vsock_sock *vsk); s64 virtio_transport_stream_has_space(struct vsock_sock *vsk); u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk); ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk); void virtio_transport_consume_skb_sent(struct sk_buff *skb, bool consume); int virtio_transport_do_socket_init(struct vsock_sock *vsk, struct vsock_sock *psk); int virtio_transport_notify_poll_in(struct vsock_sock *vsk, size_t target, bool *data_ready_now); int virtio_transport_notify_poll_out(struct vsock_sock *vsk, size_t target, bool *space_available_now); int virtio_transport_notify_recv_init(struct vsock_sock *vsk, size_t target, struct vsock_transport_recv_notify_data *data); int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, size_t target, struct vsock_transport_recv_notify_data *data); int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, size_t target, struct vsock_transport_recv_notify_data *data); int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, size_t target, ssize_t copied, bool data_read, struct vsock_transport_recv_notify_data *data); int virtio_transport_notify_send_init(struct vsock_sock *vsk, struct vsock_transport_send_notify_data *data); int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, struct vsock_transport_send_notify_data *data); int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, struct vsock_transport_send_notify_data *data); int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written, struct vsock_transport_send_notify_data *data); void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val); u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk); bool virtio_transport_stream_is_active(struct vsock_sock *vsk); bool virtio_transport_stream_allow(u32 cid, u32 port); int virtio_transport_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr); bool virtio_transport_dgram_allow(u32 cid, u32 port); int virtio_transport_connect(struct vsock_sock *vsk); int virtio_transport_shutdown(struct vsock_sock *vsk, int mode); void virtio_transport_release(struct vsock_sock *vsk); ssize_t virtio_transport_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg, size_t len); int virtio_transport_dgram_enqueue(struct vsock_sock *vsk, struct sockaddr_vm *remote_addr, struct msghdr *msg, size_t len); void virtio_transport_destruct(struct vsock_sock *vsk); void virtio_transport_recv_pkt(struct virtio_transport *t, struct sk_buff *skb); void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb); u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted); void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit); void virtio_transport_deliver_tap_pkt(struct sk_buff *skb); int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *list); int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t read_actor); int virtio_transport_notify_set_rcvlowat(struct vsock_sock *vsk, int val); #endif /* _LINUX_VIRTIO_VSOCK_H */
1 3 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 // SPDX-License-Identifier: GPL-2.0-or-later /* * i2c support for Silicon Labs' CP2615 Digital Audio Bridge * * (c) 2021, Bence Csókás <bence98@sch.bme.hu> */ #include <linux/errno.h> #include <linux/i2c.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/string.h> #include <linux/usb.h> /** CP2615 I/O Protocol implementation */ #define CP2615_VID 0x10c4 #define CP2615_PID 0xeac1 #define IOP_EP_IN 0x82 #define IOP_EP_OUT 0x02 #define IOP_IFN 1 #define IOP_ALTSETTING 2 #define MAX_IOP_SIZE 64 #define MAX_IOP_PAYLOAD_SIZE (MAX_IOP_SIZE - 6) #define MAX_I2C_SIZE (MAX_IOP_PAYLOAD_SIZE - 4) enum cp2615_iop_msg_type { iop_GetAccessoryInfo = 0xD100, iop_AccessoryInfo = 0xA100, iop_GetPortConfiguration = 0xD203, iop_PortConfiguration = 0xA203, iop_DoI2cTransfer = 0xD400, iop_I2cTransferResult = 0xA400, iop_GetSerialState = 0xD501, iop_SerialState = 0xA501 }; struct __packed cp2615_iop_msg { __be16 preamble, length, msg; u8 data[MAX_IOP_PAYLOAD_SIZE]; }; #define PART_ID_A01 0x1400 #define PART_ID_A02 0x1500 struct __packed cp2615_iop_accessory_info { __be16 part_id, option_id, proto_ver; }; struct __packed cp2615_i2c_transfer { u8 tag, i2caddr, read_len, write_len; u8 data[MAX_I2C_SIZE]; }; /* Possible values for struct cp2615_i2c_transfer_result.status */ enum cp2615_i2c_status { /* Writing to the internal EEPROM failed, because it is locked */ CP2615_CFG_LOCKED = -6, /* read_len or write_len out of range */ CP2615_INVALID_PARAM = -4, /* I2C target did not ACK in time */ CP2615_TIMEOUT, /* I2C bus busy */ CP2615_BUS_BUSY, /* I2C bus error (ie. target NAK'd the request) */ CP2615_BUS_ERROR, CP2615_SUCCESS }; struct __packed cp2615_i2c_transfer_result { u8 tag, i2caddr; s8 status; u8 read_len; u8 data[MAX_I2C_SIZE]; }; static int cp2615_init_iop_msg(struct cp2615_iop_msg *ret, enum cp2615_iop_msg_type msg, const void *data, size_t data_len) { if (data_len > MAX_IOP_PAYLOAD_SIZE) return -EFBIG; if (!ret) return -EINVAL; ret->preamble = htons(0x2A2AU); ret->length = htons(data_len + 6); ret->msg = htons(msg); if (data && data_len) memcpy(&ret->data, data, data_len); return 0; } static int cp2615_init_i2c_msg(struct cp2615_iop_msg *ret, const struct cp2615_i2c_transfer *data) { return cp2615_init_iop_msg(ret, iop_DoI2cTransfer, data, 4 + data->write_len); } /* Translates status codes to Linux errno's */ static int cp2615_check_status(enum cp2615_i2c_status status) { switch (status) { case CP2615_SUCCESS: return 0; case CP2615_BUS_ERROR: return -ENXIO; case CP2615_BUS_BUSY: return -EAGAIN; case CP2615_TIMEOUT: return -ETIMEDOUT; case CP2615_INVALID_PARAM: return -EINVAL; case CP2615_CFG_LOCKED: return -EPERM; } /* Unknown error code */ return -EPROTO; } /** Driver code */ static int cp2615_i2c_send(struct usb_interface *usbif, struct cp2615_i2c_transfer *i2c_w) { struct cp2615_iop_msg *msg = kzalloc(sizeof(*msg), GFP_KERNEL); struct usb_device *usbdev = interface_to_usbdev(usbif); int res = cp2615_init_i2c_msg(msg, i2c_w); if (!res) res = usb_bulk_msg(usbdev, usb_sndbulkpipe(usbdev, IOP_EP_OUT), msg, ntohs(msg->length), NULL, 0); kfree(msg); return res; } static int cp2615_i2c_recv(struct usb_interface *usbif, unsigned char tag, void *buf) { struct usb_device *usbdev = interface_to_usbdev(usbif); struct cp2615_iop_msg *msg; struct cp2615_i2c_transfer_result *i2c_r; int res; msg = kzalloc(sizeof(*msg), GFP_KERNEL); if (!msg) return -ENOMEM; res = usb_bulk_msg(usbdev, usb_rcvbulkpipe(usbdev, IOP_EP_IN), msg, sizeof(struct cp2615_iop_msg), NULL, 0); if (res < 0) { kfree(msg); return res; } i2c_r = (struct cp2615_i2c_transfer_result *)&msg->data; if (msg->msg != htons(iop_I2cTransferResult) || i2c_r->tag != tag) { kfree(msg); return -EIO; } res = cp2615_check_status(i2c_r->status); if (!res) memcpy(buf, &i2c_r->data, i2c_r->read_len); kfree(msg); return res; } /* Checks if the IOP is functional by querying the part's ID */ static int cp2615_check_iop(struct usb_interface *usbif) { struct cp2615_iop_msg *msg = kzalloc(sizeof(*msg), GFP_KERNEL); struct cp2615_iop_accessory_info *info = (struct cp2615_iop_accessory_info *)&msg->data; struct usb_device *usbdev = interface_to_usbdev(usbif); int res = cp2615_init_iop_msg(msg, iop_GetAccessoryInfo, NULL, 0); if (res) goto out; res = usb_bulk_msg(usbdev, usb_sndbulkpipe(usbdev, IOP_EP_OUT), msg, ntohs(msg->length), NULL, 0); if (res) goto out; res = usb_bulk_msg(usbdev, usb_rcvbulkpipe(usbdev, IOP_EP_IN), msg, sizeof(struct cp2615_iop_msg), NULL, 0); if (res) goto out; if (msg->msg != htons(iop_AccessoryInfo)) { res = -EIO; goto out; } switch (ntohs(info->part_id)) { case PART_ID_A01: dev_dbg(&usbif->dev, "Found A01 part. (WARNING: errata exists!)\n"); break; case PART_ID_A02: dev_dbg(&usbif->dev, "Found good A02 part.\n"); break; default: dev_warn(&usbif->dev, "Unknown part ID %04X\n", ntohs(info->part_id)); } out: kfree(msg); return res; } static int cp2615_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) { struct usb_interface *usbif = adap->algo_data; int i = 0, ret = 0; struct i2c_msg *msg; struct cp2615_i2c_transfer i2c_w = {0}; dev_dbg(&usbif->dev, "Doing %d I2C transactions\n", num); for (; !ret && i < num; i++) { msg = &msgs[i]; i2c_w.tag = 0xdd; i2c_w.i2caddr = i2c_8bit_addr_from_msg(msg); if (msg->flags & I2C_M_RD) { i2c_w.read_len = msg->len; i2c_w.write_len = 0; } else { i2c_w.read_len = 0; i2c_w.write_len = msg->len; memcpy(&i2c_w.data, msg->buf, i2c_w.write_len); } ret = cp2615_i2c_send(usbif, &i2c_w); if (ret) break; ret = cp2615_i2c_recv(usbif, i2c_w.tag, msg->buf); } if (ret < 0) return ret; return i; } static u32 cp2615_i2c_func(struct i2c_adapter *adap) { return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; } static const struct i2c_algorithm cp2615_i2c_algo = { .xfer = cp2615_i2c_xfer, .functionality = cp2615_i2c_func, }; /* * This chip has some limitations: one is that the USB endpoint * can only receive 64 bytes/transfer, that leaves 54 bytes for * the I2C transfer. On top of that, EITHER read_len OR write_len * may be zero, but not both. If both are non-zero, the adapter * issues a write followed by a read. And the chip does not * support repeated START between the write and read phases. */ static struct i2c_adapter_quirks cp2615_i2c_quirks = { .max_write_len = MAX_I2C_SIZE, .max_read_len = MAX_I2C_SIZE, .flags = I2C_AQ_COMB_WRITE_THEN_READ | I2C_AQ_NO_ZERO_LEN | I2C_AQ_NO_REP_START, .max_comb_1st_msg_len = MAX_I2C_SIZE, .max_comb_2nd_msg_len = MAX_I2C_SIZE }; static void cp2615_i2c_remove(struct usb_interface *usbif) { struct i2c_adapter *adap = usb_get_intfdata(usbif); usb_set_intfdata(usbif, NULL); i2c_del_adapter(adap); } static int cp2615_i2c_probe(struct usb_interface *usbif, const struct usb_device_id *id) { int ret = 0; struct i2c_adapter *adap; struct usb_device *usbdev = interface_to_usbdev(usbif); ret = usb_set_interface(usbdev, IOP_IFN, IOP_ALTSETTING); if (ret) return ret; ret = cp2615_check_iop(usbif); if (ret) return ret; adap = devm_kzalloc(&usbif->dev, sizeof(struct i2c_adapter), GFP_KERNEL); if (!adap) return -ENOMEM; strscpy(adap->name, usbdev->serial, sizeof(adap->name)); adap->owner = THIS_MODULE; adap->dev.parent = &usbif->dev; adap->dev.of_node = usbif->dev.of_node; adap->timeout = HZ; adap->algo = &cp2615_i2c_algo; adap->quirks = &cp2615_i2c_quirks; adap->algo_data = usbif; ret = i2c_add_adapter(adap); if (ret) return ret; usb_set_intfdata(usbif, adap); return 0; } static const struct usb_device_id id_table[] = { { USB_DEVICE_INTERFACE_NUMBER(CP2615_VID, CP2615_PID, IOP_IFN) }, { } }; MODULE_DEVICE_TABLE(usb, id_table); static struct usb_driver cp2615_i2c_driver = { .name = "i2c-cp2615", .probe = cp2615_i2c_probe, .disconnect = cp2615_i2c_remove, .id_table = id_table, }; module_usb_driver(cp2615_i2c_driver); MODULE_AUTHOR("Bence Csókás <bence98@sch.bme.hu>"); MODULE_DESCRIPTION("CP2615 I2C bus driver"); MODULE_LICENSE("GPL");
3151 3151 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 // SPDX-License-Identifier: GPL-2.0 /* * This file contains functions which manage high resolution tick * related events. * * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner */ #include <linux/cpu.h> #include <linux/err.h> #include <linux/hrtimer.h> #include <linux/interrupt.h> #include <linux/percpu.h> #include <linux/profile.h> #include <linux/sched.h> #include "tick-internal.h" /** * tick_program_event - program the CPU local timer device for the next event */ int tick_program_event(ktime_t expires, int force) { struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); if (unlikely(expires == KTIME_MAX)) { /* * We don't need the clock event device any more, stop it. */ clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED); dev->next_event = KTIME_MAX; return 0; } if (unlikely(clockevent_state_oneshot_stopped(dev))) { /* * We need the clock event again, configure it in ONESHOT mode * before using it. */ clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); } return clockevents_program_event(dev, expires, force); } /** * tick_resume_oneshot - resume oneshot mode */ void tick_resume_oneshot(void) { struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); clockevents_program_event(dev, ktime_get(), true); } /** * tick_setup_oneshot - setup the event device for oneshot mode (hres or nohz) */ void tick_setup_oneshot(struct clock_event_device *newdev, void (*handler)(struct clock_event_device *), ktime_t next_event) { newdev->event_handler = handler; clockevents_switch_state(newdev, CLOCK_EVT_STATE_ONESHOT); clockevents_program_event(newdev, next_event, true); } /** * tick_switch_to_oneshot - switch to oneshot mode */ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); struct clock_event_device *dev = td->evtdev; if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) || !tick_device_is_functional(dev)) { pr_info("Clockevents: could not switch to one-shot mode:"); if (!dev) { pr_cont(" no tick device\n"); } else { if (!tick_device_is_functional(dev)) pr_cont(" %s is not functional.\n", dev->name); else pr_cont(" %s does not support one-shot mode.\n", dev->name); } return -EINVAL; } td->mode = TICKDEV_MODE_ONESHOT; dev->event_handler = handler; clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); tick_broadcast_switch_to_oneshot(); return 0; } /** * tick_oneshot_mode_active - check whether the system is in oneshot mode * * returns 1 when either nohz or highres are enabled. otherwise 0. */ int tick_oneshot_mode_active(void) { unsigned long flags; int ret; local_irq_save(flags); ret = __this_cpu_read(tick_cpu_device.mode) == TICKDEV_MODE_ONESHOT; local_irq_restore(flags); return ret; } #ifdef CONFIG_HIGH_RES_TIMERS /** * tick_init_highres - switch to high resolution mode * * Called with interrupts disabled. */ int tick_init_highres(void) { return tick_switch_to_oneshot(hrtimer_interrupt); } #endif
3 3 2 3 3 5 5 1 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 // SPDX-License-Identifier: GPL-2.0-only /* * driver for the i2c-tiny-usb adapter - 1.0 * http://www.harbaum.org/till/i2c_tiny_usb * * Copyright (C) 2006-2007 Till Harbaum (Till@Harbaum.org) */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/string_choices.h> #include <linux/types.h> /* include interfaces to usb layer */ #include <linux/usb.h> /* include interface to i2c layer */ #include <linux/i2c.h> /* commands via USB, must match command ids in the firmware */ #define CMD_ECHO 0 #define CMD_GET_FUNC 1 #define CMD_SET_DELAY 2 #define CMD_GET_STATUS 3 #define CMD_I2C_IO 4 #define CMD_I2C_IO_BEGIN (1<<0) #define CMD_I2C_IO_END (1<<1) /* i2c bit delay, default is 10us -> 100kHz max (in practice, due to additional delays in the i2c bitbanging code this results in a i2c clock of about 50kHz) */ static unsigned short delay = 10; module_param(delay, ushort, 0); MODULE_PARM_DESC(delay, "bit delay in microseconds " "(default is 10us for 100kHz max)"); static int usb_read(struct i2c_adapter *adapter, int cmd, int value, int index, void *data, int len); static int usb_write(struct i2c_adapter *adapter, int cmd, int value, int index, void *data, int len); /* ----- begin of i2c layer ---------------------------------------------- */ #define STATUS_IDLE 0 #define STATUS_ADDRESS_ACK 1 #define STATUS_ADDRESS_NAK 2 static int usb_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) { unsigned char *pstatus; struct i2c_msg *pmsg; int i, ret; pstatus = kmalloc(sizeof(*pstatus), GFP_KERNEL); if (!pstatus) return -ENOMEM; for (i = 0 ; i < num ; i++) { int cmd = CMD_I2C_IO; if (i == 0) cmd |= CMD_I2C_IO_BEGIN; if (i == num-1) cmd |= CMD_I2C_IO_END; pmsg = &msgs[i]; dev_dbg(&adapter->dev, " %d: %s (flags %d) %d bytes to 0x%02x\n", i, str_read_write(pmsg->flags & I2C_M_RD), pmsg->flags, pmsg->len, pmsg->addr); /* and directly send the message */ if (pmsg->flags & I2C_M_RD) { /* read data */ if (usb_read(adapter, cmd, pmsg->flags, pmsg->addr, pmsg->buf, pmsg->len) != pmsg->len) { dev_err(&adapter->dev, "failure reading data\n"); ret = -EIO; goto out; } } else { /* write data */ if (usb_write(adapter, cmd, pmsg->flags, pmsg->addr, pmsg->buf, pmsg->len) != pmsg->len) { dev_err(&adapter->dev, "failure writing data\n"); ret = -EIO; goto out; } } /* read status */ if (usb_read(adapter, CMD_GET_STATUS, 0, 0, pstatus, 1) != 1) { dev_err(&adapter->dev, "failure reading status\n"); ret = -EIO; goto out; } dev_dbg(&adapter->dev, " status = %d\n", *pstatus); if (*pstatus == STATUS_ADDRESS_NAK) { ret = -ENXIO; goto out; } } ret = i; out: kfree(pstatus); return ret; } static u32 usb_func(struct i2c_adapter *adapter) { __le32 *pfunc; u32 ret; pfunc = kmalloc(sizeof(*pfunc), GFP_KERNEL); /* get functionality from adapter */ if (!pfunc || usb_read(adapter, CMD_GET_FUNC, 0, 0, pfunc, sizeof(*pfunc)) != sizeof(*pfunc)) { dev_err(&adapter->dev, "failure reading functionality\n"); ret = 0; goto out; } ret = le32_to_cpup(pfunc); out: kfree(pfunc); return ret; } /* prevent invalid 0-length usb_control_msg */ static const struct i2c_adapter_quirks usb_quirks = { .flags = I2C_AQ_NO_ZERO_LEN_READ, }; /* This is the actual algorithm we define */ static const struct i2c_algorithm usb_algorithm = { .xfer = usb_xfer, .functionality = usb_func, }; /* ----- end of i2c layer ------------------------------------------------ */ /* ----- begin of usb layer ---------------------------------------------- */ /* * Initially the usb i2c interface uses a vid/pid pair donated by * Future Technology Devices International Ltd., later a pair was * bought from EZPrototypes */ static const struct usb_device_id i2c_tiny_usb_table[] = { { USB_DEVICE(0x0403, 0xc631) }, /* FTDI */ { USB_DEVICE(0x1c40, 0x0534) }, /* EZPrototypes */ { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, i2c_tiny_usb_table); /* Structure to hold all of our device specific stuff */ struct i2c_tiny_usb { struct usb_device *usb_dev; /* the usb device for this device */ struct usb_interface *interface; /* the interface for this device */ struct i2c_adapter adapter; /* i2c related things */ }; static int usb_read(struct i2c_adapter *adapter, int cmd, int value, int index, void *data, int len) { struct i2c_tiny_usb *dev = (struct i2c_tiny_usb *)adapter->algo_data; void *dmadata = kmalloc(len, GFP_KERNEL); int ret; if (!dmadata) return -ENOMEM; /* do control transfer */ ret = usb_control_msg(dev->usb_dev, usb_rcvctrlpipe(dev->usb_dev, 0), cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_IN, value, index, dmadata, len, 2000); memcpy(data, dmadata, len); kfree(dmadata); return ret; } static int usb_write(struct i2c_adapter *adapter, int cmd, int value, int index, void *data, int len) { struct i2c_tiny_usb *dev = (struct i2c_tiny_usb *)adapter->algo_data; void *dmadata = kmemdup(data, len, GFP_KERNEL); int ret; if (!dmadata) return -ENOMEM; /* do control transfer */ ret = usb_control_msg(dev->usb_dev, usb_sndctrlpipe(dev->usb_dev, 0), cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE, value, index, dmadata, len, 2000); kfree(dmadata); return ret; } static void i2c_tiny_usb_free(struct i2c_tiny_usb *dev) { usb_put_dev(dev->usb_dev); kfree(dev); } static int i2c_tiny_usb_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct i2c_tiny_usb *dev; int retval = -ENOMEM; u16 version; if (interface->intf_assoc && interface->intf_assoc->bFunctionClass != USB_CLASS_VENDOR_SPEC) return -ENODEV; dev_dbg(&interface->dev, "probing usb device\n"); /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) goto error; dev->usb_dev = usb_get_dev(interface_to_usbdev(interface)); dev->interface = interface; /* save our data pointer in this interface device */ usb_set_intfdata(interface, dev); version = le16_to_cpu(dev->usb_dev->descriptor.bcdDevice); dev_info(&interface->dev, "version %x.%02x found at bus %03d address %03d\n", version >> 8, version & 0xff, dev->usb_dev->bus->busnum, dev->usb_dev->devnum); /* setup i2c adapter description */ dev->adapter.owner = THIS_MODULE; dev->adapter.class = I2C_CLASS_HWMON; dev->adapter.quirks = &usb_quirks; dev->adapter.algo = &usb_algorithm; dev->adapter.algo_data = dev; snprintf(dev->adapter.name, sizeof(dev->adapter.name), "i2c-tiny-usb at bus %03d device %03d", dev->usb_dev->bus->busnum, dev->usb_dev->devnum); if (usb_write(&dev->adapter, CMD_SET_DELAY, delay, 0, NULL, 0) != 0) { dev_err(&dev->adapter.dev, "failure setting delay to %dus\n", delay); retval = -EIO; goto error; } dev->adapter.dev.parent = &dev->interface->dev; /* and finally attach to i2c layer */ i2c_add_adapter(&dev->adapter); /* inform user about successful attachment to i2c layer */ dev_info(&dev->adapter.dev, "connected i2c-tiny-usb device\n"); return 0; error: if (dev) i2c_tiny_usb_free(dev); return retval; } static void i2c_tiny_usb_disconnect(struct usb_interface *interface) { struct i2c_tiny_usb *dev = usb_get_intfdata(interface); i2c_del_adapter(&dev->adapter); usb_set_intfdata(interface, NULL); i2c_tiny_usb_free(dev); dev_dbg(&interface->dev, "disconnected\n"); } static struct usb_driver i2c_tiny_usb_driver = { .name = "i2c-tiny-usb", .probe = i2c_tiny_usb_probe, .disconnect = i2c_tiny_usb_disconnect, .id_table = i2c_tiny_usb_table, }; module_usb_driver(i2c_tiny_usb_driver); /* ----- end of usb layer ------------------------------------------------ */ MODULE_AUTHOR("Till Harbaum <Till@Harbaum.org>"); MODULE_DESCRIPTION("i2c-tiny-usb driver v1.0"); MODULE_LICENSE("GPL");
32 32 42 1 1 37 1 2 2 37 5 5 34 1 32 1 12 20 32 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/file.h> #include <linux/io_uring/cmd.h> #include <linux/security.h> #include <linux/nospec.h> #include <uapi/linux/io_uring.h> #include "io_uring.h" #include "alloc_cache.h" #include "rsrc.h" #include "kbuf.h" #include "uring_cmd.h" #include "poll.h" void io_cmd_cache_free(const void *entry) { struct io_async_cmd *ac = (struct io_async_cmd *)entry; io_vec_free(&ac->vec); kfree(ac); } static void io_req_uring_cleanup(struct io_kiocb *req, unsigned int issue_flags) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); struct io_async_cmd *ac = req->async_data; if (issue_flags & IO_URING_F_UNLOCKED) return; io_alloc_cache_vec_kasan(&ac->vec); if (ac->vec.nr > IO_VEC_CACHE_SOFT_CAP) io_vec_free(&ac->vec); if (io_alloc_cache_put(&req->ctx->cmd_cache, ac)) { ioucmd->sqe = NULL; io_req_async_data_clear(req, REQ_F_NEED_CLEANUP); } } void io_uring_cmd_cleanup(struct io_kiocb *req) { io_req_uring_cleanup(req, 0); } bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx, struct io_uring_task *tctx, bool cancel_all) { struct hlist_node *tmp; struct io_kiocb *req; bool ret = false; lockdep_assert_held(&ctx->uring_lock); hlist_for_each_entry_safe(req, tmp, &ctx->cancelable_uring_cmd, hash_node) { struct io_uring_cmd *cmd = io_kiocb_to_cmd(req, struct io_uring_cmd); struct file *file = req->file; if (!cancel_all && req->tctx != tctx) continue; if (cmd->flags & IORING_URING_CMD_CANCELABLE) { file->f_op->uring_cmd(cmd, IO_URING_F_CANCEL | IO_URING_F_COMPLETE_DEFER); ret = true; } } io_submit_flush_completions(ctx); return ret; } static void io_uring_cmd_del_cancelable(struct io_uring_cmd *cmd, unsigned int issue_flags) { struct io_kiocb *req = cmd_to_io_kiocb(cmd); struct io_ring_ctx *ctx = req->ctx; if (!(cmd->flags & IORING_URING_CMD_CANCELABLE)) return; cmd->flags &= ~IORING_URING_CMD_CANCELABLE; io_ring_submit_lock(ctx, issue_flags); hlist_del(&req->hash_node); io_ring_submit_unlock(ctx, issue_flags); } /* * Mark this command as concelable, then io_uring_try_cancel_uring_cmd() * will try to cancel this issued command by sending ->uring_cmd() with * issue_flags of IO_URING_F_CANCEL. * * The command is guaranteed to not be done when calling ->uring_cmd() * with IO_URING_F_CANCEL, but it is driver's responsibility to deal * with race between io_uring canceling and normal completion. */ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, unsigned int issue_flags) { struct io_kiocb *req = cmd_to_io_kiocb(cmd); struct io_ring_ctx *ctx = req->ctx; if (!(cmd->flags & IORING_URING_CMD_CANCELABLE)) { cmd->flags |= IORING_URING_CMD_CANCELABLE; io_ring_submit_lock(ctx, issue_flags); hlist_add_head(&req->hash_node, &ctx->cancelable_uring_cmd); io_ring_submit_unlock(ctx, issue_flags); } } EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable); static void io_uring_cmd_work(struct io_kiocb *req, io_tw_token_t tw) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); unsigned int flags = IO_URING_F_COMPLETE_DEFER; if (io_should_terminate_tw(req->ctx)) flags |= IO_URING_F_TASK_DEAD; /* task_work executor checks the deffered list completion */ ioucmd->task_work_cb(ioucmd, flags); } void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, io_uring_cmd_tw_t task_work_cb, unsigned flags) { struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); if (WARN_ON_ONCE(req->flags & REQ_F_APOLL_MULTISHOT)) return; ioucmd->task_work_cb = task_work_cb; req->io_task_work.func = io_uring_cmd_work; __io_req_task_work_add(req, flags); } EXPORT_SYMBOL_GPL(__io_uring_cmd_do_in_task); static inline void io_req_set_cqe32_extra(struct io_kiocb *req, u64 extra1, u64 extra2) { req->big_cqe.extra1 = extra1; req->big_cqe.extra2 = extra2; } /* * Called by consumers of io_uring_cmd, if they originally returned * -EIOCBQUEUED upon receiving the command. */ void __io_uring_cmd_done(struct io_uring_cmd *ioucmd, s32 ret, u64 res2, unsigned issue_flags, bool is_cqe32) { struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); if (WARN_ON_ONCE(req->flags & REQ_F_APOLL_MULTISHOT)) return; io_uring_cmd_del_cancelable(ioucmd, issue_flags); if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); if (is_cqe32) { if (req->ctx->flags & IORING_SETUP_CQE_MIXED) req->cqe.flags |= IORING_CQE_F_32; io_req_set_cqe32_extra(req, res2, 0); } io_req_uring_cleanup(req, issue_flags); if (req->ctx->flags & IORING_SETUP_IOPOLL) { /* order with io_iopoll_req_issued() checking ->iopoll_complete */ smp_store_release(&req->iopoll_completed, 1); } else if (issue_flags & IO_URING_F_COMPLETE_DEFER) { if (WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED)) return; io_req_complete_defer(req); } else { req->io_task_work.func = io_req_task_complete; io_req_task_work_add(req); } } EXPORT_SYMBOL_GPL(__io_uring_cmd_done); int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); struct io_async_cmd *ac; if (sqe->__pad1) return -EINVAL; ioucmd->flags = READ_ONCE(sqe->uring_cmd_flags); if (ioucmd->flags & ~IORING_URING_CMD_MASK) return -EINVAL; if (ioucmd->flags & IORING_URING_CMD_FIXED) { if (ioucmd->flags & IORING_URING_CMD_MULTISHOT) return -EINVAL; req->buf_index = READ_ONCE(sqe->buf_index); } if (!!(ioucmd->flags & IORING_URING_CMD_MULTISHOT) != !!(req->flags & REQ_F_BUFFER_SELECT)) return -EINVAL; ioucmd->cmd_op = READ_ONCE(sqe->cmd_op); ac = io_uring_alloc_async_data(&req->ctx->cmd_cache, req); if (!ac) return -ENOMEM; ioucmd->sqe = sqe; return 0; } void io_uring_cmd_sqe_copy(struct io_kiocb *req) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); struct io_async_cmd *ac = req->async_data; /* Should not happen, as REQ_F_SQE_COPIED covers this */ if (WARN_ON_ONCE(ioucmd->sqe == ac->sqes)) return; memcpy(ac->sqes, ioucmd->sqe, uring_sqe_size(req->ctx)); ioucmd->sqe = ac->sqes; } int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); struct io_ring_ctx *ctx = req->ctx; struct file *file = req->file; int ret; if (!file->f_op->uring_cmd) return -EOPNOTSUPP; ret = security_uring_cmd(ioucmd); if (ret) return ret; if (ctx->flags & IORING_SETUP_SQE128) issue_flags |= IO_URING_F_SQE128; if (ctx->flags & (IORING_SETUP_CQE32 | IORING_SETUP_CQE_MIXED)) issue_flags |= IO_URING_F_CQE32; if (io_is_compat(ctx)) issue_flags |= IO_URING_F_COMPAT; if (ctx->flags & IORING_SETUP_IOPOLL) { if (!file->f_op->uring_cmd_iopoll) return -EOPNOTSUPP; issue_flags |= IO_URING_F_IOPOLL; req->iopoll_completed = 0; if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL) { /* make sure every req only blocks once */ req->flags &= ~REQ_F_IOPOLL_STATE; req->iopoll_start = ktime_get_ns(); } } ret = file->f_op->uring_cmd(ioucmd, issue_flags); if (ioucmd->flags & IORING_URING_CMD_MULTISHOT) { if (ret >= 0) return IOU_ISSUE_SKIP_COMPLETE; } if (ret == -EAGAIN) { ioucmd->flags |= IORING_URING_CMD_REISSUE; return ret; } if (ret == -EIOCBQUEUED) return ret; if (ret < 0) req_set_fail(req); io_req_uring_cleanup(req, issue_flags); io_req_set_res(req, ret, 0); return IOU_COMPLETE; } int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, struct iov_iter *iter, struct io_uring_cmd *ioucmd, unsigned int issue_flags) { struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); if (WARN_ON_ONCE(!(ioucmd->flags & IORING_URING_CMD_FIXED))) return -EINVAL; return io_import_reg_buf(req, iter, ubuf, len, rw, issue_flags); } EXPORT_SYMBOL_GPL(io_uring_cmd_import_fixed); int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd, const struct iovec __user *uvec, size_t uvec_segs, int ddir, struct iov_iter *iter, unsigned issue_flags) { struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); struct io_async_cmd *ac = req->async_data; int ret; if (WARN_ON_ONCE(!(ioucmd->flags & IORING_URING_CMD_FIXED))) return -EINVAL; ret = io_prep_reg_iovec(req, &ac->vec, uvec, uvec_segs); if (ret) return ret; return io_import_reg_vec(ddir, iter, req, &ac->vec, uvec_segs, issue_flags); } EXPORT_SYMBOL_GPL(io_uring_cmd_import_fixed_vec); void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd) { struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); io_req_queue_iowq(req); } int io_cmd_poll_multishot(struct io_uring_cmd *cmd, unsigned int issue_flags, __poll_t mask) { struct io_kiocb *req = cmd_to_io_kiocb(cmd); int ret; if (likely(req->flags & REQ_F_APOLL_MULTISHOT)) return 0; req->flags |= REQ_F_APOLL_MULTISHOT; mask &= ~EPOLLONESHOT; ret = io_arm_apoll(req, issue_flags, mask); return ret == IO_APOLL_OK ? -EIOCBQUEUED : -ECANCELED; } bool io_uring_cmd_post_mshot_cqe32(struct io_uring_cmd *cmd, unsigned int issue_flags, struct io_uring_cqe cqe[2]) { struct io_kiocb *req = cmd_to_io_kiocb(cmd); if (WARN_ON_ONCE(!(issue_flags & IO_URING_F_MULTISHOT))) return false; return io_req_post_cqe32(req, cqe); } /* * Work with io_uring_mshot_cmd_post_cqe() together for committing the * provided buffer upfront */ struct io_br_sel io_uring_cmd_buffer_select(struct io_uring_cmd *ioucmd, unsigned buf_group, size_t *len, unsigned int issue_flags) { struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); if (!(ioucmd->flags & IORING_URING_CMD_MULTISHOT)) return (struct io_br_sel) { .val = -EINVAL }; if (WARN_ON_ONCE(!io_do_buffer_select(req))) return (struct io_br_sel) { .val = -EINVAL }; return io_buffer_select(req, len, buf_group, issue_flags); } EXPORT_SYMBOL_GPL(io_uring_cmd_buffer_select); /* * Return true if this multishot uring_cmd needs to be completed, otherwise * the event CQE is posted successfully. * * This function must use `struct io_br_sel` returned from * io_uring_cmd_buffer_select() for committing the buffer in the same * uring_cmd submission context. */ bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd, struct io_br_sel *sel, unsigned int issue_flags) { struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); unsigned int cflags = 0; if (!(ioucmd->flags & IORING_URING_CMD_MULTISHOT)) return true; if (sel->val > 0) { cflags = io_put_kbuf(req, sel->val, sel->buf_list); if (io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) return false; } io_kbuf_recycle(req, sel->buf_list, issue_flags); if (sel->val < 0) req_set_fail(req); io_req_set_res(req, sel->val, cflags); return true; } EXPORT_SYMBOL_GPL(io_uring_mshot_cmd_post_cqe);
4287 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 // SPDX-License-Identifier: GPL-2.0-only /* * Dynamic DMA mapping support. * * This implementation is a fallback for platforms that do not support * I/O TLBs (aka DMA address translation hardware). * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com> * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com> * Copyright (C) 2000, 2003 Hewlett-Packard Co * David Mosberger-Tang <davidm@hpl.hp.com> * * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid * unnecessary i-cache flushing. * 04/07/.. ak Better overflow handling. Assorted fixes. * 05/09/10 linville Add support for syncing ranges, support syncing for * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup. * 08/12/11 beckyb Add highmem support */ #define pr_fmt(fmt) "software IO TLB: " fmt #include <linux/cache.h> #include <linux/cc_platform.h> #include <linux/ctype.h> #include <linux/debugfs.h> #include <linux/dma-direct.h> #include <linux/dma-map-ops.h> #include <linux/export.h> #include <linux/gfp.h> #include <linux/highmem.h> #include <linux/io.h> #include <linux/iommu-helper.h> #include <linux/init.h> #include <linux/memblock.h> #include <linux/mm.h> #include <linux/pfn.h> #include <linux/rculist.h> #include <linux/scatterlist.h> #include <linux/set_memory.h> #include <linux/spinlock.h> #include <linux/string.h> #include <linux/swiotlb.h> #include <linux/types.h> #ifdef CONFIG_DMA_RESTRICTED_POOL #include <linux/of.h> #include <linux/of_fdt.h> #include <linux/of_reserved_mem.h> #include <linux/slab.h> #endif #define CREATE_TRACE_POINTS #include <trace/events/swiotlb.h> #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) /* * Minimum IO TLB size to bother booting with. Systems with mainly * 64bit capable cards will only lightly use the swiotlb. If we can't * allocate a contiguous 1MB, we're probably in trouble anyway. */ #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) #define INVALID_PHYS_ADDR (~(phys_addr_t)0) /** * struct io_tlb_slot - IO TLB slot descriptor * @orig_addr: The original address corresponding to a mapped entry. * @alloc_size: Size of the allocated buffer. * @list: The free list describing the number of free entries available * from each index. * @pad_slots: Number of preceding padding slots. Valid only in the first * allocated non-padding slot. */ struct io_tlb_slot { phys_addr_t orig_addr; size_t alloc_size; unsigned short list; unsigned short pad_slots; }; static bool swiotlb_force_bounce; static bool swiotlb_force_disable; #ifdef CONFIG_SWIOTLB_DYNAMIC static void swiotlb_dyn_alloc(struct work_struct *work); static struct io_tlb_mem io_tlb_default_mem = { .lock = __SPIN_LOCK_UNLOCKED(io_tlb_default_mem.lock), .pools = LIST_HEAD_INIT(io_tlb_default_mem.pools), .dyn_alloc = __WORK_INITIALIZER(io_tlb_default_mem.dyn_alloc, swiotlb_dyn_alloc), }; #else /* !CONFIG_SWIOTLB_DYNAMIC */ static struct io_tlb_mem io_tlb_default_mem; #endif /* CONFIG_SWIOTLB_DYNAMIC */ static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT; static unsigned long default_nareas; /** * struct io_tlb_area - IO TLB memory area descriptor * * This is a single area with a single lock. * * @used: The number of used IO TLB block. * @index: The slot index to start searching in this area for next round. * @lock: The lock to protect the above data structures in the map and * unmap calls. */ struct io_tlb_area { unsigned long used; unsigned int index; spinlock_t lock; }; /* * Round up number of slabs to the next power of 2. The last area is going * be smaller than the rest if default_nslabs is not power of two. * The number of slot in an area should be a multiple of IO_TLB_SEGSIZE, * otherwise a segment may span two or more areas. It conflicts with free * contiguous slots tracking: free slots are treated contiguous no matter * whether they cross an area boundary. * * Return true if default_nslabs is rounded up. */ static bool round_up_default_nslabs(void) { if (!default_nareas) return false; if (default_nslabs < IO_TLB_SEGSIZE * default_nareas) default_nslabs = IO_TLB_SEGSIZE * default_nareas; else if (is_power_of_2(default_nslabs)) return false; default_nslabs = roundup_pow_of_two(default_nslabs); return true; } /** * swiotlb_adjust_nareas() - adjust the number of areas and slots * @nareas: Desired number of areas. Zero is treated as 1. * * Adjust the default number of areas in a memory pool. * The default size of the memory pool may also change to meet minimum area * size requirements. */ static void swiotlb_adjust_nareas(unsigned int nareas) { if (!nareas) nareas = 1; else if (!is_power_of_2(nareas)) nareas = roundup_pow_of_two(nareas); default_nareas = nareas; pr_info("area num %d.\n", nareas); if (round_up_default_nslabs()) pr_info("SWIOTLB bounce buffer size roundup to %luMB", (default_nslabs << IO_TLB_SHIFT) >> 20); } /** * limit_nareas() - get the maximum number of areas for a given memory pool size * @nareas: Desired number of areas. * @nslots: Total number of slots in the memory pool. * * Limit the number of areas to the maximum possible number of areas in * a memory pool of the given size. * * Return: Maximum possible number of areas. */ static unsigned int limit_nareas(unsigned int nareas, unsigned long nslots) { if (nslots < nareas * IO_TLB_SEGSIZE) return nslots / IO_TLB_SEGSIZE; return nareas; } static int __init setup_io_tlb_npages(char *str) { if (isdigit(*str)) { /* avoid tail segment of size < IO_TLB_SEGSIZE */ default_nslabs = ALIGN(simple_strtoul(str, &str, 0), IO_TLB_SEGSIZE); } if (*str == ',') ++str; if (isdigit(*str)) swiotlb_adjust_nareas(simple_strtoul(str, &str, 0)); if (*str == ',') ++str; if (!strcmp(str, "force")) swiotlb_force_bounce = true; else if (!strcmp(str, "noforce")) swiotlb_force_disable = true; return 0; } early_param("swiotlb", setup_io_tlb_npages); unsigned long swiotlb_size_or_default(void) { return default_nslabs << IO_TLB_SHIFT; } void __init swiotlb_adjust_size(unsigned long size) { /* * If swiotlb parameter has not been specified, give a chance to * architectures such as those supporting memory encryption to * adjust/expand SWIOTLB size for their use. */ if (default_nslabs != IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT) return; size = ALIGN(size, IO_TLB_SIZE); default_nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); if (round_up_default_nslabs()) size = default_nslabs << IO_TLB_SHIFT; pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20); } void swiotlb_print_info(void) { struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; if (!mem->nslabs) { pr_warn("No low mem\n"); return; } pr_info("mapped [mem %pa-%pa] (%luMB)\n", &mem->start, &mem->end, (mem->nslabs << IO_TLB_SHIFT) >> 20); } static inline unsigned long io_tlb_offset(unsigned long val) { return val & (IO_TLB_SEGSIZE - 1); } static inline unsigned long nr_slots(u64 val) { return DIV_ROUND_UP(val, IO_TLB_SIZE); } /* * Early SWIOTLB allocation may be too early to allow an architecture to * perform the desired operations. This function allows the architecture to * call SWIOTLB when the operations are possible. It needs to be called * before the SWIOTLB memory is used. */ void __init swiotlb_update_mem_attributes(void) { struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; unsigned long bytes; if (!mem->nslabs || mem->late_alloc) return; bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT); set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT); } static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start, unsigned long nslabs, bool late_alloc, unsigned int nareas) { void *vaddr = phys_to_virt(start); unsigned long bytes = nslabs << IO_TLB_SHIFT, i; mem->nslabs = nslabs; mem->start = start; mem->end = mem->start + bytes; mem->late_alloc = late_alloc; mem->nareas = nareas; mem->area_nslabs = nslabs / mem->nareas; for (i = 0; i < mem->nareas; i++) { spin_lock_init(&mem->areas[i].lock); mem->areas[i].index = 0; mem->areas[i].used = 0; } for (i = 0; i < mem->nslabs; i++) { mem->slots[i].list = min(IO_TLB_SEGSIZE - io_tlb_offset(i), mem->nslabs - i); mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; mem->slots[i].pad_slots = 0; } memset(vaddr, 0, bytes); mem->vaddr = vaddr; return; } /** * add_mem_pool() - add a memory pool to the allocator * @mem: Software IO TLB allocator. * @pool: Memory pool to be added. */ static void add_mem_pool(struct io_tlb_mem *mem, struct io_tlb_pool *pool) { #ifdef CONFIG_SWIOTLB_DYNAMIC spin_lock(&mem->lock); list_add_rcu(&pool->node, &mem->pools); mem->nslabs += pool->nslabs; spin_unlock(&mem->lock); #else mem->nslabs = pool->nslabs; #endif } static void __init *swiotlb_memblock_alloc(unsigned long nslabs, unsigned int flags, int (*remap)(void *tlb, unsigned long nslabs)) { size_t bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT); void *tlb; /* * By default allocate the bounce buffer memory from low memory, but * allow to pick a location everywhere for hypervisors with guest * memory encryption. */ if (flags & SWIOTLB_ANY) tlb = memblock_alloc(bytes, PAGE_SIZE); else tlb = memblock_alloc_low(bytes, PAGE_SIZE); if (!tlb) { pr_warn("%s: Failed to allocate %zu bytes tlb structure\n", __func__, bytes); return NULL; } if (remap && remap(tlb, nslabs) < 0) { memblock_free(tlb, PAGE_ALIGN(bytes)); pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes); return NULL; } return tlb; } /* * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. */ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, int (*remap)(void *tlb, unsigned long nslabs)) { struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; unsigned long nslabs; unsigned int nareas; size_t alloc_size; void *tlb; if (!addressing_limit && !swiotlb_force_bounce) return; if (swiotlb_force_disable) return; io_tlb_default_mem.force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE); #ifdef CONFIG_SWIOTLB_DYNAMIC if (!remap) io_tlb_default_mem.can_grow = true; if (flags & SWIOTLB_ANY) io_tlb_default_mem.phys_limit = virt_to_phys(high_memory - 1); else io_tlb_default_mem.phys_limit = ARCH_LOW_ADDRESS_LIMIT; #endif if (!default_nareas) swiotlb_adjust_nareas(num_possible_cpus()); nslabs = default_nslabs; nareas = limit_nareas(default_nareas, nslabs); while ((tlb = swiotlb_memblock_alloc(nslabs, flags, remap)) == NULL) { if (nslabs <= IO_TLB_MIN_SLABS) return; nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); nareas = limit_nareas(nareas, nslabs); } if (default_nslabs != nslabs) { pr_info("SWIOTLB bounce buffer size adjusted %lu -> %lu slabs", default_nslabs, nslabs); default_nslabs = nslabs; } alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs)); mem->slots = memblock_alloc(alloc_size, PAGE_SIZE); if (!mem->slots) { pr_warn("%s: Failed to allocate %zu bytes align=0x%lx\n", __func__, alloc_size, PAGE_SIZE); return; } mem->areas = memblock_alloc(array_size(sizeof(struct io_tlb_area), nareas), SMP_CACHE_BYTES); if (!mem->areas) { pr_warn("%s: Failed to allocate mem->areas.\n", __func__); return; } swiotlb_init_io_tlb_pool(mem, __pa(tlb), nslabs, false, nareas); add_mem_pool(&io_tlb_default_mem, mem); if (flags & SWIOTLB_VERBOSE) swiotlb_print_info(); } void __init swiotlb_init(bool addressing_limit, unsigned int flags) { swiotlb_init_remap(addressing_limit, flags, NULL); } /* * Systems with larger DMA zones (those that don't support ISA) can * initialize the swiotlb later using the slab allocator if needed. * This should be just like above, but with some error catching. */ int swiotlb_init_late(size_t size, gfp_t gfp_mask, int (*remap)(void *tlb, unsigned long nslabs)) { struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); unsigned int nareas; unsigned char *vstart = NULL; unsigned int order, area_order; bool retried = false; int rc = 0; if (io_tlb_default_mem.nslabs) return 0; if (swiotlb_force_disable) return 0; io_tlb_default_mem.force_bounce = swiotlb_force_bounce; #ifdef CONFIG_SWIOTLB_DYNAMIC if (!remap) io_tlb_default_mem.can_grow = true; if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp_mask & __GFP_DMA)) io_tlb_default_mem.phys_limit = zone_dma_limit; else if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp_mask & __GFP_DMA32)) io_tlb_default_mem.phys_limit = max(DMA_BIT_MASK(32), zone_dma_limit); else io_tlb_default_mem.phys_limit = virt_to_phys(high_memory - 1); #endif if (!default_nareas) swiotlb_adjust_nareas(num_possible_cpus()); retry: order = get_order(nslabs << IO_TLB_SHIFT); nslabs = SLABS_PER_PAGE << order; while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { vstart = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, order); if (vstart) break; order--; nslabs = SLABS_PER_PAGE << order; retried = true; } if (!vstart) return -ENOMEM; if (remap) rc = remap(vstart, nslabs); if (rc) { free_pages((unsigned long)vstart, order); nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); if (nslabs < IO_TLB_MIN_SLABS) return rc; retried = true; goto retry; } if (retried) { pr_warn("only able to allocate %ld MB\n", (PAGE_SIZE << order) >> 20); } nareas = limit_nareas(default_nareas, nslabs); area_order = get_order(array_size(sizeof(*mem->areas), nareas)); mem->areas = (struct io_tlb_area *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, area_order); if (!mem->areas) goto error_area; mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(array_size(sizeof(*mem->slots), nslabs))); if (!mem->slots) goto error_slots; set_memory_decrypted((unsigned long)vstart, (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT); swiotlb_init_io_tlb_pool(mem, virt_to_phys(vstart), nslabs, true, nareas); add_mem_pool(&io_tlb_default_mem, mem); swiotlb_print_info(); return 0; error_slots: free_pages((unsigned long)mem->areas, area_order); error_area: free_pages((unsigned long)vstart, order); return -ENOMEM; } void __init swiotlb_exit(void) { struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; unsigned long tbl_vaddr; size_t tbl_size, slots_size; unsigned int area_order; if (swiotlb_force_bounce) return; if (!mem->nslabs) return; pr_info("tearing down default memory pool\n"); tbl_vaddr = (unsigned long)phys_to_virt(mem->start); tbl_size = PAGE_ALIGN(mem->end - mem->start); slots_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), mem->nslabs)); set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT); if (mem->late_alloc) { area_order = get_order(array_size(sizeof(*mem->areas), mem->nareas)); free_pages((unsigned long)mem->areas, area_order); free_pages(tbl_vaddr, get_order(tbl_size)); free_pages((unsigned long)mem->slots, get_order(slots_size)); } else { memblock_free_late(__pa(mem->areas), array_size(sizeof(*mem->areas), mem->nareas)); memblock_free_late(mem->start, tbl_size); memblock_free_late(__pa(mem->slots), slots_size); } memset(mem, 0, sizeof(*mem)); } #ifdef CONFIG_SWIOTLB_DYNAMIC /** * alloc_dma_pages() - allocate pages to be used for DMA * @gfp: GFP flags for the allocation. * @bytes: Size of the buffer. * @phys_limit: Maximum allowed physical address of the buffer. * * Allocate pages from the buddy allocator. If successful, make the allocated * pages decrypted that they can be used for DMA. * * Return: Decrypted pages, %NULL on allocation failure, or ERR_PTR(-EAGAIN) * if the allocated physical address was above @phys_limit. */ static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit) { unsigned int order = get_order(bytes); struct page *page; phys_addr_t paddr; void *vaddr; page = alloc_pages(gfp, order); if (!page) return NULL; paddr = page_to_phys(page); if (paddr + bytes - 1 > phys_limit) { __free_pages(page, order); return ERR_PTR(-EAGAIN); } vaddr = phys_to_virt(paddr); if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes))) goto error; return page; error: /* Intentional leak if pages cannot be encrypted again. */ if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes))) __free_pages(page, order); return NULL; } /** * swiotlb_alloc_tlb() - allocate a dynamic IO TLB buffer * @dev: Device for which a memory pool is allocated. * @bytes: Size of the buffer. * @phys_limit: Maximum allowed physical address of the buffer. * @gfp: GFP flags for the allocation. * * Return: Allocated pages, or %NULL on allocation failure. */ static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes, u64 phys_limit, gfp_t gfp) { struct page *page; /* * Allocate from the atomic pools if memory is encrypted and * the allocation is atomic, because decrypting may block. */ if (!gfpflags_allow_blocking(gfp) && dev && force_dma_unencrypted(dev)) { void *vaddr; if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL)) return NULL; return dma_alloc_from_pool(dev, bytes, &vaddr, gfp, dma_coherent_ok); } gfp &= ~GFP_ZONEMASK; if (phys_limit <= zone_dma_limit) gfp |= __GFP_DMA; else if (phys_limit <= DMA_BIT_MASK(32)) gfp |= __GFP_DMA32; while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit))) { if (IS_ENABLED(CONFIG_ZONE_DMA32) && phys_limit < DMA_BIT_MASK(64) && !(gfp & (__GFP_DMA32 | __GFP_DMA))) gfp |= __GFP_DMA32; else if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & __GFP_DMA)) gfp = (gfp & ~__GFP_DMA32) | __GFP_DMA; else return NULL; } return page; } /** * swiotlb_free_tlb() - free a dynamically allocated IO TLB buffer * @vaddr: Virtual address of the buffer. * @bytes: Size of the buffer. */ static void swiotlb_free_tlb(void *vaddr, size_t bytes) { if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && dma_free_from_pool(NULL, vaddr, bytes)) return; /* Intentional leak if pages cannot be encrypted again. */ if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes))) __free_pages(virt_to_page(vaddr), get_order(bytes)); } /** * swiotlb_alloc_pool() - allocate a new IO TLB memory pool * @dev: Device for which a memory pool is allocated. * @minslabs: Minimum number of slabs. * @nslabs: Desired (maximum) number of slabs. * @nareas: Number of areas. * @phys_limit: Maximum DMA buffer physical address. * @gfp: GFP flags for the allocations. * * Allocate and initialize a new IO TLB memory pool. The actual number of * slabs may be reduced if allocation of @nslabs fails. If even * @minslabs cannot be allocated, this function fails. * * Return: New memory pool, or %NULL on allocation failure. */ static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev, unsigned long minslabs, unsigned long nslabs, unsigned int nareas, u64 phys_limit, gfp_t gfp) { struct io_tlb_pool *pool; unsigned int slot_order; struct page *tlb; size_t pool_size; size_t tlb_size; if (nslabs > SLABS_PER_PAGE << MAX_PAGE_ORDER) { nslabs = SLABS_PER_PAGE << MAX_PAGE_ORDER; nareas = limit_nareas(nareas, nslabs); } pool_size = sizeof(*pool) + array_size(sizeof(*pool->areas), nareas); pool = kzalloc(pool_size, gfp); if (!pool) goto error; pool->areas = (void *)pool + sizeof(*pool); tlb_size = nslabs << IO_TLB_SHIFT; while (!(tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, gfp))) { if (nslabs <= minslabs) goto error_tlb; nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); nareas = limit_nareas(nareas, nslabs); tlb_size = nslabs << IO_TLB_SHIFT; } slot_order = get_order(array_size(sizeof(*pool->slots), nslabs)); pool->slots = (struct io_tlb_slot *) __get_free_pages(gfp, slot_order); if (!pool->slots) goto error_slots; swiotlb_init_io_tlb_pool(pool, page_to_phys(tlb), nslabs, true, nareas); return pool; error_slots: swiotlb_free_tlb(page_address(tlb), tlb_size); error_tlb: kfree(pool); error: return NULL; } /** * swiotlb_dyn_alloc() - dynamic memory pool allocation worker * @work: Pointer to dyn_alloc in struct io_tlb_mem. */ static void swiotlb_dyn_alloc(struct work_struct *work) { struct io_tlb_mem *mem = container_of(work, struct io_tlb_mem, dyn_alloc); struct io_tlb_pool *pool; pool = swiotlb_alloc_pool(NULL, IO_TLB_MIN_SLABS, default_nslabs, default_nareas, mem->phys_limit, GFP_KERNEL); if (!pool) { pr_warn_ratelimited("Failed to allocate new pool"); return; } add_mem_pool(mem, pool); } /** * swiotlb_dyn_free() - RCU callback to free a memory pool * @rcu: RCU head in the corresponding struct io_tlb_pool. */ static void swiotlb_dyn_free(struct rcu_head *rcu) { struct io_tlb_pool *pool = container_of(rcu, struct io_tlb_pool, rcu); size_t slots_size = array_size(sizeof(*pool->slots), pool->nslabs); size_t tlb_size = pool->end - pool->start; free_pages((unsigned long)pool->slots, get_order(slots_size)); swiotlb_free_tlb(pool->vaddr, tlb_size); kfree(pool); } /** * __swiotlb_find_pool() - find the IO TLB pool for a physical address * @dev: Device which has mapped the DMA buffer. * @paddr: Physical address within the DMA buffer. * * Find the IO TLB memory pool descriptor which contains the given physical * address, if any. This function is for use only when the dev is known to * be using swiotlb. Use swiotlb_find_pool() for the more general case * when this condition is not met. * * Return: Memory pool which contains @paddr, or %NULL if none. */ struct io_tlb_pool *__swiotlb_find_pool(struct device *dev, phys_addr_t paddr) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; struct io_tlb_pool *pool; rcu_read_lock(); list_for_each_entry_rcu(pool, &mem->pools, node) { if (paddr >= pool->start && paddr < pool->end) goto out; } list_for_each_entry_rcu(pool, &dev->dma_io_tlb_pools, node) { if (paddr >= pool->start && paddr < pool->end) goto out; } pool = NULL; out: rcu_read_unlock(); return pool; } /** * swiotlb_del_pool() - remove an IO TLB pool from a device * @dev: Owning device. * @pool: Memory pool to be removed. */ static void swiotlb_del_pool(struct device *dev, struct io_tlb_pool *pool) { unsigned long flags; spin_lock_irqsave(&dev->dma_io_tlb_lock, flags); list_del_rcu(&pool->node); spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); call_rcu(&pool->rcu, swiotlb_dyn_free); } #endif /* CONFIG_SWIOTLB_DYNAMIC */ /** * swiotlb_dev_init() - initialize swiotlb fields in &struct device * @dev: Device to be initialized. */ void swiotlb_dev_init(struct device *dev) { dev->dma_io_tlb_mem = &io_tlb_default_mem; #ifdef CONFIG_SWIOTLB_DYNAMIC INIT_LIST_HEAD(&dev->dma_io_tlb_pools); spin_lock_init(&dev->dma_io_tlb_lock); dev->dma_uses_io_tlb = false; #endif } /** * swiotlb_align_offset() - Get required offset into an IO TLB allocation. * @dev: Owning device. * @align_mask: Allocation alignment mask. * @addr: DMA address. * * Return the minimum offset from the start of an IO TLB allocation which is * required for a given buffer address and allocation alignment to keep the * device happy. * * First, the address bits covered by min_align_mask must be identical in the * original address and the bounce buffer address. High bits are preserved by * choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra * padding bytes before the bounce buffer. * * Second, @align_mask specifies which bits of the first allocated slot must * be zero. This may require allocating additional padding slots, and then the * offset (in bytes) from the first such padding slot is returned. */ static unsigned int swiotlb_align_offset(struct device *dev, unsigned int align_mask, u64 addr) { return addr & dma_get_min_align_mask(dev) & (align_mask | (IO_TLB_SIZE - 1)); } /* * Bounce: copy the swiotlb buffer from or back to the original dma location */ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir, struct io_tlb_pool *mem) { int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT; phys_addr_t orig_addr = mem->slots[index].orig_addr; size_t alloc_size = mem->slots[index].alloc_size; unsigned long pfn = PFN_DOWN(orig_addr); unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start; int tlb_offset; if (orig_addr == INVALID_PHYS_ADDR) return; /* * It's valid for tlb_offset to be negative. This can happen when the * "offset" returned by swiotlb_align_offset() is non-zero, and the * tlb_addr is pointing within the first "offset" bytes of the second * or subsequent slots of the allocated swiotlb area. While it's not * valid for tlb_addr to be pointing within the first "offset" bytes * of the first slot, there's no way to check for such an error since * this function can't distinguish the first slot from the second and * subsequent slots. */ tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) - swiotlb_align_offset(dev, 0, orig_addr); orig_addr += tlb_offset; alloc_size -= tlb_offset; if (size > alloc_size) { dev_WARN_ONCE(dev, 1, "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu.\n", alloc_size, size); size = alloc_size; } if (PageHighMem(pfn_to_page(pfn))) { unsigned int offset = orig_addr & ~PAGE_MASK; struct page *page; unsigned int sz = 0; unsigned long flags; while (size) { sz = min_t(size_t, PAGE_SIZE - offset, size); local_irq_save(flags); page = pfn_to_page(pfn); if (dir == DMA_TO_DEVICE) memcpy_from_page(vaddr, page, offset, sz); else memcpy_to_page(page, offset, vaddr, sz); local_irq_restore(flags); size -= sz; pfn++; vaddr += sz; offset = 0; } } else if (dir == DMA_TO_DEVICE) { memcpy(vaddr, phys_to_virt(orig_addr), size); } else { memcpy(phys_to_virt(orig_addr), vaddr, size); } } static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx) { return start + (idx << IO_TLB_SHIFT); } /* * Carefully handle integer overflow which can occur when boundary_mask == ~0UL. */ static inline unsigned long get_max_slots(unsigned long boundary_mask) { return (boundary_mask >> IO_TLB_SHIFT) + 1; } static unsigned int wrap_area_index(struct io_tlb_pool *mem, unsigned int index) { if (index >= mem->area_nslabs) return 0; return index; } /* * Track the total used slots with a global atomic value in order to have * correct information to determine the high water mark. The mem_used() * function gives imprecise results because there's no locking across * multiple areas. */ #ifdef CONFIG_DEBUG_FS static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots) { unsigned long old_hiwater, new_used; new_used = atomic_long_add_return(nslots, &mem->total_used); old_hiwater = atomic_long_read(&mem->used_hiwater); do { if (new_used <= old_hiwater) break; } while (!atomic_long_try_cmpxchg(&mem->used_hiwater, &old_hiwater, new_used)); } static void dec_used(struct io_tlb_mem *mem, unsigned int nslots) { atomic_long_sub(nslots, &mem->total_used); } #else /* !CONFIG_DEBUG_FS */ static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots) { } static void dec_used(struct io_tlb_mem *mem, unsigned int nslots) { } #endif /* CONFIG_DEBUG_FS */ #ifdef CONFIG_SWIOTLB_DYNAMIC #ifdef CONFIG_DEBUG_FS static void inc_transient_used(struct io_tlb_mem *mem, unsigned int nslots) { atomic_long_add(nslots, &mem->transient_nslabs); } static void dec_transient_used(struct io_tlb_mem *mem, unsigned int nslots) { atomic_long_sub(nslots, &mem->transient_nslabs); } #else /* !CONFIG_DEBUG_FS */ static void inc_transient_used(struct io_tlb_mem *mem, unsigned int nslots) { } static void dec_transient_used(struct io_tlb_mem *mem, unsigned int nslots) { } #endif /* CONFIG_DEBUG_FS */ #endif /* CONFIG_SWIOTLB_DYNAMIC */ /** * swiotlb_search_pool_area() - search one memory area in one pool * @dev: Device which maps the buffer. * @pool: Memory pool to be searched. * @area_index: Index of the IO TLB memory area to be searched. * @orig_addr: Original (non-bounced) IO buffer address. * @alloc_size: Total requested size of the bounce buffer, * including initial alignment padding. * @alloc_align_mask: Required alignment of the allocated buffer. * * Find a suitable sequence of IO TLB entries for the request and allocate * a buffer from the given IO TLB memory area. * This function takes care of locking. * * Return: Index of the first allocated slot, or -1 on error. */ static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool, int area_index, phys_addr_t orig_addr, size_t alloc_size, unsigned int alloc_align_mask) { struct io_tlb_area *area = pool->areas + area_index; unsigned long boundary_mask = dma_get_seg_boundary(dev); dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(dev, pool->start) & boundary_mask; unsigned long max_slots = get_max_slots(boundary_mask); unsigned int iotlb_align_mask = dma_get_min_align_mask(dev); unsigned int nslots = nr_slots(alloc_size), stride; unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr); unsigned int index, slots_checked, count = 0, i; unsigned long flags; unsigned int slot_base; unsigned int slot_index; BUG_ON(!nslots); BUG_ON(area_index >= pool->nareas); /* * Historically, swiotlb allocations >= PAGE_SIZE were guaranteed to be * page-aligned in the absence of any other alignment requirements. * 'alloc_align_mask' was later introduced to specify the alignment * explicitly, however this is passed as zero for streaming mappings * and so we preserve the old behaviour there in case any drivers are * relying on it. */ if (!alloc_align_mask && !iotlb_align_mask && alloc_size >= PAGE_SIZE) alloc_align_mask = PAGE_SIZE - 1; /* * Ensure that the allocation is at least slot-aligned and update * 'iotlb_align_mask' to ignore bits that will be preserved when * offsetting into the allocation. */ alloc_align_mask |= (IO_TLB_SIZE - 1); iotlb_align_mask &= ~alloc_align_mask; /* * For mappings with an alignment requirement don't bother looping to * unaligned slots once we found an aligned one. */ stride = get_max_slots(max(alloc_align_mask, iotlb_align_mask)); spin_lock_irqsave(&area->lock, flags); if (unlikely(nslots > pool->area_nslabs - area->used)) goto not_found; slot_base = area_index * pool->area_nslabs; index = area->index; for (slots_checked = 0; slots_checked < pool->area_nslabs; ) { phys_addr_t tlb_addr; slot_index = slot_base + index; tlb_addr = slot_addr(tbl_dma_addr, slot_index); if ((tlb_addr & alloc_align_mask) || (orig_addr && (tlb_addr & iotlb_align_mask) != (orig_addr & iotlb_align_mask))) { index = wrap_area_index(pool, index + 1); slots_checked++; continue; } if (!iommu_is_span_boundary(slot_index, nslots, nr_slots(tbl_dma_addr), max_slots)) { if (pool->slots[slot_index].list >= nslots) goto found; } index = wrap_area_index(pool, index + stride); slots_checked += stride; } not_found: spin_unlock_irqrestore(&area->lock, flags); return -1; found: /* * If we find a slot that indicates we have 'nslots' number of * contiguous buffers, we allocate the buffers from that slot onwards * and set the list of free entries to '0' indicating unavailable. */ for (i = slot_index; i < slot_index + nslots; i++) { pool->slots[i].list = 0; pool->slots[i].alloc_size = alloc_size - (offset + ((i - slot_index) << IO_TLB_SHIFT)); } for (i = slot_index - 1; io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && pool->slots[i].list; i--) pool->slots[i].list = ++count; /* * Update the indices to avoid searching in the next round. */ area->index = wrap_area_index(pool, index + nslots); area->used += nslots; spin_unlock_irqrestore(&area->lock, flags); inc_used_and_hiwater(dev->dma_io_tlb_mem, nslots); return slot_index; } #ifdef CONFIG_SWIOTLB_DYNAMIC /** * swiotlb_search_area() - search one memory area in all pools * @dev: Device which maps the buffer. * @start_cpu: Start CPU number. * @cpu_offset: Offset from @start_cpu. * @orig_addr: Original (non-bounced) IO buffer address. * @alloc_size: Total requested size of the bounce buffer, * including initial alignment padding. * @alloc_align_mask: Required alignment of the allocated buffer. * @retpool: Used memory pool, updated on return. * * Search one memory area in all pools for a sequence of slots that match the * allocation constraints. * * Return: Index of the first allocated slot, or -1 on error. */ static int swiotlb_search_area(struct device *dev, int start_cpu, int cpu_offset, phys_addr_t orig_addr, size_t alloc_size, unsigned int alloc_align_mask, struct io_tlb_pool **retpool) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; struct io_tlb_pool *pool; int area_index; int index = -1; rcu_read_lock(); list_for_each_entry_rcu(pool, &mem->pools, node) { if (cpu_offset >= pool->nareas) continue; area_index = (start_cpu + cpu_offset) & (pool->nareas - 1); index = swiotlb_search_pool_area(dev, pool, area_index, orig_addr, alloc_size, alloc_align_mask); if (index >= 0) { *retpool = pool; break; } } rcu_read_unlock(); return index; } /** * swiotlb_find_slots() - search for slots in the whole swiotlb * @dev: Device which maps the buffer. * @orig_addr: Original (non-bounced) IO buffer address. * @alloc_size: Total requested size of the bounce buffer, * including initial alignment padding. * @alloc_align_mask: Required alignment of the allocated buffer. * @retpool: Used memory pool, updated on return. * * Search through the whole software IO TLB to find a sequence of slots that * match the allocation constraints. * * Return: Index of the first allocated slot, or -1 on error. */ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, size_t alloc_size, unsigned int alloc_align_mask, struct io_tlb_pool **retpool) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; struct io_tlb_pool *pool; unsigned long nslabs; unsigned long flags; u64 phys_limit; int cpu, i; int index; if (alloc_size > IO_TLB_SEGSIZE * IO_TLB_SIZE) return -1; cpu = raw_smp_processor_id(); for (i = 0; i < default_nareas; ++i) { index = swiotlb_search_area(dev, cpu, i, orig_addr, alloc_size, alloc_align_mask, &pool); if (index >= 0) goto found; } if (!mem->can_grow) return -1; schedule_work(&mem->dyn_alloc); nslabs = nr_slots(alloc_size); phys_limit = min_not_zero(*dev->dma_mask, dev->bus_dma_limit); pool = swiotlb_alloc_pool(dev, nslabs, nslabs, 1, phys_limit, GFP_NOWAIT); if (!pool) return -1; index = swiotlb_search_pool_area(dev, pool, 0, orig_addr, alloc_size, alloc_align_mask); if (index < 0) { swiotlb_dyn_free(&pool->rcu); return -1; } pool->transient = true; spin_lock_irqsave(&dev->dma_io_tlb_lock, flags); list_add_rcu(&pool->node, &dev->dma_io_tlb_pools); spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); inc_transient_used(mem, pool->nslabs); found: WRITE_ONCE(dev->dma_uses_io_tlb, true); /* * The general barrier orders reads and writes against a presumed store * of the SWIOTLB buffer address by a device driver (to a driver private * data structure). It serves two purposes. * * First, the store to dev->dma_uses_io_tlb must be ordered before the * presumed store. This guarantees that the returned buffer address * cannot be passed to another CPU before updating dev->dma_uses_io_tlb. * * Second, the load from mem->pools must be ordered before the same * presumed store. This guarantees that the returned buffer address * cannot be observed by another CPU before an update of the RCU list * that was made by swiotlb_dyn_alloc() on a third CPU (cf. multicopy * atomicity). * * See also the comment in swiotlb_find_pool(). */ smp_mb(); *retpool = pool; return index; } #else /* !CONFIG_SWIOTLB_DYNAMIC */ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, size_t alloc_size, unsigned int alloc_align_mask, struct io_tlb_pool **retpool) { struct io_tlb_pool *pool; int start, i; int index; *retpool = pool = &dev->dma_io_tlb_mem->defpool; i = start = raw_smp_processor_id() & (pool->nareas - 1); do { index = swiotlb_search_pool_area(dev, pool, i, orig_addr, alloc_size, alloc_align_mask); if (index >= 0) return index; if (++i >= pool->nareas) i = 0; } while (i != start); return -1; } #endif /* CONFIG_SWIOTLB_DYNAMIC */ #ifdef CONFIG_DEBUG_FS /** * mem_used() - get number of used slots in an allocator * @mem: Software IO TLB allocator. * * The result is accurate in this version of the function, because an atomic * counter is available if CONFIG_DEBUG_FS is set. * * Return: Number of used slots. */ static unsigned long mem_used(struct io_tlb_mem *mem) { return atomic_long_read(&mem->total_used); } #else /* !CONFIG_DEBUG_FS */ /** * mem_pool_used() - get number of used slots in a memory pool * @pool: Software IO TLB memory pool. * * The result is not accurate, see mem_used(). * * Return: Approximate number of used slots. */ static unsigned long mem_pool_used(struct io_tlb_pool *pool) { int i; unsigned long used = 0; for (i = 0; i < pool->nareas; i++) used += pool->areas[i].used; return used; } /** * mem_used() - get number of used slots in an allocator * @mem: Software IO TLB allocator. * * The result is not accurate, because there is no locking of individual * areas. * * Return: Approximate number of used slots. */ static unsigned long mem_used(struct io_tlb_mem *mem) { #ifdef CONFIG_SWIOTLB_DYNAMIC struct io_tlb_pool *pool; unsigned long used = 0; rcu_read_lock(); list_for_each_entry_rcu(pool, &mem->pools, node) used += mem_pool_used(pool); rcu_read_unlock(); return used; #else return mem_pool_used(&mem->defpool); #endif } #endif /* CONFIG_DEBUG_FS */ /** * swiotlb_tbl_map_single() - bounce buffer map a single contiguous physical area * @dev: Device which maps the buffer. * @orig_addr: Original (non-bounced) physical IO buffer address * @mapping_size: Requested size of the actual bounce buffer, excluding * any pre- or post-padding for alignment * @alloc_align_mask: Required start and end alignment of the allocated buffer * @dir: DMA direction * @attrs: Optional DMA attributes for the map operation * * Find and allocate a suitable sequence of IO TLB slots for the request. * The allocated space starts at an alignment specified by alloc_align_mask, * and the size of the allocated space is rounded up so that the total amount * of allocated space is a multiple of (alloc_align_mask + 1). If * alloc_align_mask is zero, the allocated space may be at any alignment and * the size is not rounded up. * * The returned address is within the allocated space and matches the bits * of orig_addr that are specified in the DMA min_align_mask for the device. As * such, this returned address may be offset from the beginning of the allocated * space. The bounce buffer space starting at the returned address for * mapping_size bytes is initialized to the contents of the original IO buffer * area. Any pre-padding (due to an offset) and any post-padding (due to * rounding-up the size) is not initialized. */ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, size_t mapping_size, unsigned int alloc_align_mask, enum dma_data_direction dir, unsigned long attrs) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; unsigned int offset; struct io_tlb_pool *pool; unsigned int i; size_t size; int index; phys_addr_t tlb_addr; unsigned short pad_slots; if (!mem || !mem->nslabs) { dev_warn_ratelimited(dev, "Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); return (phys_addr_t)DMA_MAPPING_ERROR; } if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n"); /* * The default swiotlb memory pool is allocated with PAGE_SIZE * alignment. If a mapping is requested with larger alignment, * the mapping may be unable to use the initial slot(s) in all * sets of IO_TLB_SEGSIZE slots. In such case, a mapping request * of or near the maximum mapping size would always fail. */ dev_WARN_ONCE(dev, alloc_align_mask > ~PAGE_MASK, "Alloc alignment may prevent fulfilling requests with max mapping_size\n"); offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr); size = ALIGN(mapping_size + offset, alloc_align_mask + 1); index = swiotlb_find_slots(dev, orig_addr, size, alloc_align_mask, &pool); if (index == -1) { if (!(attrs & DMA_ATTR_NO_WARN)) dev_warn_ratelimited(dev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", size, mem->nslabs, mem_used(mem)); return (phys_addr_t)DMA_MAPPING_ERROR; } /* * If dma_skip_sync was set, reset it on first SWIOTLB buffer * mapping to always sync SWIOTLB buffers. */ dma_reset_need_sync(dev); /* * Save away the mapping from the original address to the DMA address. * This is needed when we sync the memory. Then we sync the buffer if * needed. */ pad_slots = offset >> IO_TLB_SHIFT; offset &= (IO_TLB_SIZE - 1); index += pad_slots; pool->slots[index].pad_slots = pad_slots; for (i = 0; i < (nr_slots(size) - pad_slots); i++) pool->slots[index + i].orig_addr = slot_addr(orig_addr, i); tlb_addr = slot_addr(pool->start, index) + offset; /* * When the device is writing memory, i.e. dir == DMA_FROM_DEVICE, copy * the original buffer to the TLB buffer before initiating DMA in order * to preserve the original's data if the device does a partial write, * i.e. if the device doesn't overwrite the entire buffer. Preserving * the original data, even if it's garbage, is necessary to match * hardware behavior. Use of swiotlb is supposed to be transparent, * i.e. swiotlb must not corrupt memory by clobbering unwritten bytes. */ swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE, pool); return tlb_addr; } static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr, struct io_tlb_pool *mem) { unsigned long flags; unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr); int index, nslots, aindex; struct io_tlb_area *area; int count, i; index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; index -= mem->slots[index].pad_slots; nslots = nr_slots(mem->slots[index].alloc_size + offset); aindex = index / mem->area_nslabs; area = &mem->areas[aindex]; /* * Return the buffer to the free list by setting the corresponding * entries to indicate the number of contiguous entries available. * While returning the entries to the free list, we merge the entries * with slots below and above the pool being returned. */ BUG_ON(aindex >= mem->nareas); spin_lock_irqsave(&area->lock, flags); if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE)) count = mem->slots[index + nslots].list; else count = 0; /* * Step 1: return the slots to the free list, merging the slots with * superceeding slots */ for (i = index + nslots - 1; i >= index; i--) { mem->slots[i].list = ++count; mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; mem->slots[i].pad_slots = 0; } /* * Step 2: merge the returned slots with the preceding slots, if * available (non zero) */ for (i = index - 1; io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && mem->slots[i].list; i--) mem->slots[i].list = ++count; area->used -= nslots; spin_unlock_irqrestore(&area->lock, flags); dec_used(dev->dma_io_tlb_mem, nslots); } #ifdef CONFIG_SWIOTLB_DYNAMIC /** * swiotlb_del_transient() - delete a transient memory pool * @dev: Device which mapped the buffer. * @tlb_addr: Physical address within a bounce buffer. * @pool: Pointer to the transient memory pool to be checked and deleted. * * Check whether the address belongs to a transient SWIOTLB memory pool. * If yes, then delete the pool. * * Return: %true if @tlb_addr belonged to a transient pool that was released. */ static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr, struct io_tlb_pool *pool) { if (!pool->transient) return false; dec_used(dev->dma_io_tlb_mem, pool->nslabs); swiotlb_del_pool(dev, pool); dec_transient_used(dev->dma_io_tlb_mem, pool->nslabs); return true; } #else /* !CONFIG_SWIOTLB_DYNAMIC */ static inline bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr, struct io_tlb_pool *pool) { return false; } #endif /* CONFIG_SWIOTLB_DYNAMIC */ /* * tlb_addr is the physical address of the bounce buffer to unmap. */ void __swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr, size_t mapping_size, enum dma_data_direction dir, unsigned long attrs, struct io_tlb_pool *pool) { /* * First, sync the memory before unmapping the entry */ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE, pool); if (swiotlb_del_transient(dev, tlb_addr, pool)) return; swiotlb_release_slots(dev, tlb_addr, pool); } void __swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir, struct io_tlb_pool *pool) { if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE, pool); else BUG_ON(dir != DMA_FROM_DEVICE); } void __swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir, struct io_tlb_pool *pool) { if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE, pool); else BUG_ON(dir != DMA_TO_DEVICE); } /* * Create a swiotlb mapping for the buffer at @paddr, and in case of DMAing * to the device copy the data into it as well. */ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir, unsigned long attrs) { phys_addr_t swiotlb_addr; dma_addr_t dma_addr; trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size); swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, 0, dir, attrs); if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; /* Ensure that the address returned is DMA'ble */ dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr); if (unlikely(!dma_capable(dev, dma_addr, size, true))) { __swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC, swiotlb_find_pool(dev, swiotlb_addr)); dev_WARN_ONCE(dev, 1, "swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); return DMA_MAPPING_ERROR; } if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) arch_sync_dma_for_device(swiotlb_addr, size, dir); return dma_addr; } size_t swiotlb_max_mapping_size(struct device *dev) { int min_align_mask = dma_get_min_align_mask(dev); int min_align = 0; /* * swiotlb_find_slots() skips slots according to * min align mask. This affects max mapping size. * Take it into acount here. */ if (min_align_mask) min_align = roundup(min_align_mask, IO_TLB_SIZE); return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE - min_align; } /** * is_swiotlb_allocated() - check if the default software IO TLB is initialized */ bool is_swiotlb_allocated(void) { return io_tlb_default_mem.nslabs; } bool is_swiotlb_active(struct device *dev) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; return mem && mem->nslabs; } /** * default_swiotlb_base() - get the base address of the default SWIOTLB * * Get the lowest physical address used by the default software IO TLB pool. */ phys_addr_t default_swiotlb_base(void) { #ifdef CONFIG_SWIOTLB_DYNAMIC io_tlb_default_mem.can_grow = false; #endif return io_tlb_default_mem.defpool.start; } /** * default_swiotlb_limit() - get the address limit of the default SWIOTLB * * Get the highest physical address used by the default software IO TLB pool. */ phys_addr_t default_swiotlb_limit(void) { #ifdef CONFIG_SWIOTLB_DYNAMIC return io_tlb_default_mem.phys_limit; #else return io_tlb_default_mem.defpool.end - 1; #endif } #ifdef CONFIG_DEBUG_FS #ifdef CONFIG_SWIOTLB_DYNAMIC static unsigned long mem_transient_used(struct io_tlb_mem *mem) { return atomic_long_read(&mem->transient_nslabs); } static int io_tlb_transient_used_get(void *data, u64 *val) { struct io_tlb_mem *mem = data; *val = mem_transient_used(mem); return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_transient_used, io_tlb_transient_used_get, NULL, "%llu\n"); #endif /* CONFIG_SWIOTLB_DYNAMIC */ static int io_tlb_used_get(void *data, u64 *val) { struct io_tlb_mem *mem = data; *val = mem_used(mem); return 0; } static int io_tlb_hiwater_get(void *data, u64 *val) { struct io_tlb_mem *mem = data; *val = atomic_long_read(&mem->used_hiwater); return 0; } static int io_tlb_hiwater_set(void *data, u64 val) { struct io_tlb_mem *mem = data; /* Only allow setting to zero */ if (val != 0) return -EINVAL; atomic_long_set(&mem->used_hiwater, val); return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_used, io_tlb_used_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get, io_tlb_hiwater_set, "%llu\n"); static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem, const char *dirname) { mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs); if (!mem->nslabs) return; debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs); debugfs_create_file("io_tlb_used", 0400, mem->debugfs, mem, &fops_io_tlb_used); debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem, &fops_io_tlb_hiwater); #ifdef CONFIG_SWIOTLB_DYNAMIC debugfs_create_file("io_tlb_transient_nslabs", 0400, mem->debugfs, mem, &fops_io_tlb_transient_used); #endif } static int __init swiotlb_create_default_debugfs(void) { swiotlb_create_debugfs_files(&io_tlb_default_mem, "swiotlb"); return 0; } late_initcall(swiotlb_create_default_debugfs); #else /* !CONFIG_DEBUG_FS */ static inline void swiotlb_create_debugfs_files(struct io_tlb_mem *mem, const char *dirname) { } #endif /* CONFIG_DEBUG_FS */ #ifdef CONFIG_DMA_RESTRICTED_POOL struct page *swiotlb_alloc(struct device *dev, size_t size) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; struct io_tlb_pool *pool; phys_addr_t tlb_addr; unsigned int align; int index; if (!mem) return NULL; align = (1 << (get_order(size) + PAGE_SHIFT)) - 1; index = swiotlb_find_slots(dev, 0, size, align, &pool); if (index == -1) return NULL; tlb_addr = slot_addr(pool->start, index); if (unlikely(!PAGE_ALIGNED(tlb_addr))) { dev_WARN_ONCE(dev, 1, "Cannot allocate pages from non page-aligned swiotlb addr 0x%pa.\n", &tlb_addr); swiotlb_release_slots(dev, tlb_addr, pool); return NULL; } return pfn_to_page(PFN_DOWN(tlb_addr)); } bool swiotlb_free(struct device *dev, struct page *page, size_t size) { phys_addr_t tlb_addr = page_to_phys(page); struct io_tlb_pool *pool; pool = swiotlb_find_pool(dev, tlb_addr); if (!pool) return false; swiotlb_release_slots(dev, tlb_addr, pool); return true; } static int rmem_swiotlb_device_init(struct reserved_mem *rmem, struct device *dev) { struct io_tlb_mem *mem = rmem->priv; unsigned long nslabs = rmem->size >> IO_TLB_SHIFT; /* Set Per-device io tlb area to one */ unsigned int nareas = 1; if (PageHighMem(pfn_to_page(PHYS_PFN(rmem->base)))) { dev_err(dev, "Restricted DMA pool must be accessible within the linear mapping."); return -EINVAL; } /* * Since multiple devices can share the same pool, the private data, * io_tlb_mem struct, will be initialized by the first device attached * to it. */ if (!mem) { struct io_tlb_pool *pool; mem = kzalloc(sizeof(*mem), GFP_KERNEL); if (!mem) return -ENOMEM; pool = &mem->defpool; pool->slots = kcalloc(nslabs, sizeof(*pool->slots), GFP_KERNEL); if (!pool->slots) { kfree(mem); return -ENOMEM; } pool->areas = kcalloc(nareas, sizeof(*pool->areas), GFP_KERNEL); if (!pool->areas) { kfree(pool->slots); kfree(mem); return -ENOMEM; } set_memory_decrypted((unsigned long)phys_to_virt(rmem->base), rmem->size >> PAGE_SHIFT); swiotlb_init_io_tlb_pool(pool, rmem->base, nslabs, false, nareas); mem->force_bounce = true; mem->for_alloc = true; #ifdef CONFIG_SWIOTLB_DYNAMIC spin_lock_init(&mem->lock); INIT_LIST_HEAD_RCU(&mem->pools); #endif add_mem_pool(mem, pool); rmem->priv = mem; swiotlb_create_debugfs_files(mem, rmem->name); } dev->dma_io_tlb_mem = mem; return 0; } static void rmem_swiotlb_device_release(struct reserved_mem *rmem, struct device *dev) { dev->dma_io_tlb_mem = &io_tlb_default_mem; } static const struct reserved_mem_ops rmem_swiotlb_ops = { .device_init = rmem_swiotlb_device_init, .device_release = rmem_swiotlb_device_release, }; static int __init rmem_swiotlb_setup(struct reserved_mem *rmem) { unsigned long node = rmem->fdt_node; if (of_get_flat_dt_prop(node, "reusable", NULL) || of_get_flat_dt_prop(node, "linux,cma-default", NULL) || of_get_flat_dt_prop(node, "linux,dma-default", NULL) || of_get_flat_dt_prop(node, "no-map", NULL)) return -EINVAL; rmem->ops = &rmem_swiotlb_ops; pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n", &rmem->base, (unsigned long)rmem->size / SZ_1M); return 0; } RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup); #endif /* CONFIG_DMA_RESTRICTED_POOL */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 /* SPDX-License-Identifier: GPL-2.0-only */ /* * AppArmor security module * * This file contains AppArmor basic permission sets definitions. * * Copyright 2017 Canonical Ltd. */ #ifndef __AA_PERM_H #define __AA_PERM_H #include <linux/fs.h> #include "label.h" #define AA_MAY_EXEC MAY_EXEC #define AA_MAY_WRITE MAY_WRITE #define AA_MAY_READ MAY_READ #define AA_MAY_APPEND MAY_APPEND #define AA_MAY_CREATE 0x0010 #define AA_MAY_DELETE 0x0020 #define AA_MAY_OPEN 0x0040 #define AA_MAY_RENAME 0x0080 /* pair */ #define AA_MAY_SETATTR 0x0100 /* meta write */ #define AA_MAY_GETATTR 0x0200 /* meta read */ #define AA_MAY_SETCRED 0x0400 /* security cred/attr */ #define AA_MAY_GETCRED 0x0800 #define AA_MAY_CHMOD 0x1000 /* pair */ #define AA_MAY_CHOWN 0x2000 /* pair */ #define AA_MAY_CHGRP 0x4000 /* pair */ #define AA_MAY_LOCK 0x8000 /* LINK_SUBSET overlaid */ #define AA_EXEC_MMAP 0x00010000 #define AA_MAY_MPROT 0x00020000 /* extend conditions */ #define AA_MAY_LINK 0x00040000 /* pair */ #define AA_MAY_SNAPSHOT 0x00080000 /* pair */ #define AA_MAY_DELEGATE #define AA_CONT_MATCH 0x08000000 #define AA_MAY_STACK 0x10000000 #define AA_MAY_ONEXEC 0x20000000 /* either stack or change_profile */ #define AA_MAY_CHANGE_PROFILE 0x40000000 #define AA_MAY_CHANGEHAT 0x80000000 #define AA_LINK_SUBSET AA_MAY_LOCK /* overlaid */ #define AA_MAY_CREATE_SQPOLL AA_MAY_CREATE #define AA_MAY_OVERRIDE_CRED AA_MAY_APPEND #define AA_URING_PERM_MASK (AA_MAY_OVERRIDE_CRED | AA_MAY_CREATE_SQPOLL) #define PERMS_CHRS_MASK (MAY_READ | MAY_WRITE | AA_MAY_CREATE | \ AA_MAY_DELETE | AA_MAY_LINK | AA_MAY_LOCK | \ AA_MAY_EXEC | AA_EXEC_MMAP | AA_MAY_APPEND) #define PERMS_NAMES_MASK (PERMS_CHRS_MASK | AA_MAY_OPEN | AA_MAY_RENAME | \ AA_MAY_SETATTR | AA_MAY_GETATTR | AA_MAY_SETCRED | \ AA_MAY_GETCRED | AA_MAY_CHMOD | AA_MAY_CHOWN | \ AA_MAY_CHGRP | AA_MAY_MPROT | AA_MAY_SNAPSHOT | \ AA_MAY_STACK | AA_MAY_ONEXEC | \ AA_MAY_CHANGE_PROFILE | AA_MAY_CHANGEHAT) extern const char aa_file_perm_chrs[]; extern const char *aa_file_perm_names[]; struct aa_perms { u32 allow; u32 deny; /* explicit deny, or conflict if allow also set */ u32 subtree; /* allow perm on full subtree only when allow is set */ u32 cond; /* set only when ~allow and ~deny */ u32 kill; /* set only when ~allow | deny */ u32 complain; /* accumulates only used when ~allow & ~deny */ u32 prompt; /* accumulates only used when ~allow & ~deny */ u32 audit; /* set only when allow is set */ u32 quiet; /* set only when ~allow | deny */ u32 hide; /* set only when ~allow | deny */ u32 xindex; u32 tag; /* tag string index, if present */ u32 label; /* label string index, if present */ }; /* * Indexes are broken into a 24 bit index and 8 bit flag. * For the index to be valid there must be a value in the flag */ #define AA_INDEX_MASK 0x00ffffff #define AA_INDEX_FLAG_MASK 0xff000000 #define AA_INDEX_NONE 0 #define ALL_PERMS_MASK 0xffffffff extern struct aa_perms nullperms; extern struct aa_perms allperms; /** * aa_perms_accum_raw - accumulate perms with out masking off overlapping perms * @accum: perms struct to accumulate into * @addend: perms struct to add to @accum */ static inline void aa_perms_accum_raw(struct aa_perms *accum, struct aa_perms *addend) { accum->deny |= addend->deny; accum->allow &= addend->allow & ~addend->deny; accum->audit |= addend->audit & addend->allow; accum->quiet &= addend->quiet & ~addend->allow; accum->kill |= addend->kill & ~addend->allow; accum->complain |= addend->complain & ~addend->allow & ~addend->deny; accum->cond |= addend->cond & ~addend->allow & ~addend->deny; accum->hide &= addend->hide & ~addend->allow; accum->prompt |= addend->prompt & ~addend->allow & ~addend->deny; accum->subtree |= addend->subtree & ~addend->deny; if (!accum->xindex) accum->xindex = addend->xindex; if (!accum->tag) accum->tag = addend->tag; if (!accum->label) accum->label = addend->label; } /** * aa_perms_accum - accumulate perms, masking off overlapping perms * @accum: perms struct to accumulate into * @addend: perms struct to add to @accum */ static inline void aa_perms_accum(struct aa_perms *accum, struct aa_perms *addend) { accum->deny |= addend->deny; accum->allow &= addend->allow & ~accum->deny; accum->audit |= addend->audit & accum->allow; accum->quiet &= addend->quiet & ~accum->allow; accum->kill |= addend->kill & ~accum->allow; accum->complain |= addend->complain & ~accum->allow & ~accum->deny; accum->cond |= addend->cond & ~accum->allow & ~accum->deny; accum->hide &= addend->hide & ~accum->allow; accum->prompt |= addend->prompt & ~accum->allow & ~accum->deny; accum->subtree &= addend->subtree & ~accum->deny; if (!accum->xindex) accum->xindex = addend->xindex; if (!accum->tag) accum->tag = addend->tag; if (!accum->label) accum->label = addend->label; } #define xcheck(FN1, FN2) \ ({ \ int e, error = FN1; \ e = FN2; \ if (e) \ error = e; \ error; \ }) /* * TODO: update for labels pointing to labels instead of profiles * TODO: optimize the walk, currently does subwalk of L2 for each P in L1 * gah this doesn't allow for label compound check!!!! */ #define xcheck_ns_profile_profile(P1, P2, FN, args...) \ ({ \ int ____e = 0; \ if (P1->ns == P2->ns) \ ____e = FN((P1), (P2), args); \ (____e); \ }) #define xcheck_ns_profile_label(P, L, FN, args...) \ ({ \ struct aa_profile *__p2; \ fn_for_each((L), __p2, \ xcheck_ns_profile_profile((P), __p2, (FN), args)); \ }) #define xcheck_ns_labels(L1, L2, FN, args...) \ ({ \ struct aa_profile *__p1; \ fn_for_each((L1), __p1, FN(__p1, (L2), args)); \ }) /* Do the cross check but applying FN at the profiles level */ #define xcheck_labels_profiles(L1, L2, FN, args...) \ xcheck_ns_labels((L1), (L2), xcheck_ns_profile_label, (FN), args) #define xcheck_labels(L1, L2, P, FN1, FN2) \ xcheck(fn_for_each((L1), (P), (FN1)), fn_for_each((L2), (P), (FN2))) extern struct aa_perms default_perms; void aa_perm_mask_to_str(char *str, size_t str_size, const char *chrs, u32 mask); void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names, u32 mask); void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs, u32 chrsmask, const char * const *names, u32 namesmask); void aa_apply_modes_to_perms(struct aa_profile *profile, struct aa_perms *perms); void aa_perms_accum(struct aa_perms *accum, struct aa_perms *addend); void aa_perms_accum_raw(struct aa_perms *accum, struct aa_perms *addend); void aa_profile_match_label(struct aa_profile *profile, struct aa_ruleset *rules, struct aa_label *label, int type, u32 request, struct aa_perms *perms); int aa_check_perms(struct aa_profile *profile, struct aa_perms *perms, u32 request, struct apparmor_audit_data *ad, void (*cb)(struct audit_buffer *, void *)); #endif /* __AA_PERM_H */
653 653 653 2017 2010 652 654 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 // SPDX-License-Identifier: GPL-2.0-or-later /* * linux/drivers/net/netconsole.c * * Copyright (C) 2001 Ingo Molnar <mingo@redhat.com> * * This file contains the implementation of an IRQ-safe, crash-safe * kernel console implementation that outputs kernel messages to the * network. * * Modification history: * * 2001-09-17 started by Ingo Molnar. * 2003-08-11 2.6 port by Matt Mackall * simplified options * generic card hooks * works non-modular * 2003-09-07 rewritten with netpoll api */ /**************************************************************** * ****************************************************************/ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/mm.h> #include <linux/init.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/console.h> #include <linux/moduleparam.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/netpoll.h> #include <linux/inet.h> #include <linux/configfs.h> #include <linux/etherdevice.h> #include <linux/u64_stats_sync.h> #include <linux/utsname.h> #include <linux/rtnetlink.h> MODULE_AUTHOR("Matt Mackall <mpm@selenic.com>"); MODULE_DESCRIPTION("Console driver for network interfaces"); MODULE_LICENSE("GPL"); #define MAX_PARAM_LENGTH 256 #define MAX_EXTRADATA_ENTRY_LEN 256 #define MAX_EXTRADATA_VALUE_LEN 200 /* The number 3 comes from userdata entry format characters (' ', '=', '\n') */ #define MAX_EXTRADATA_NAME_LEN (MAX_EXTRADATA_ENTRY_LEN - \ MAX_EXTRADATA_VALUE_LEN - 3) #define MAX_EXTRADATA_ITEMS 16 #define MAX_PRINT_CHUNK 1000 static char config[MAX_PARAM_LENGTH]; module_param_string(netconsole, config, MAX_PARAM_LENGTH, 0); MODULE_PARM_DESC(netconsole, " netconsole=[src-port]@[src-ip]/[dev],[tgt-port]@<tgt-ip>/[tgt-macaddr]"); static bool oops_only; module_param(oops_only, bool, 0600); MODULE_PARM_DESC(oops_only, "Only log oops messages"); #define NETCONSOLE_PARAM_TARGET_PREFIX "cmdline" #ifndef MODULE static int __init option_setup(char *opt) { strscpy(config, opt, MAX_PARAM_LENGTH); return 1; } __setup("netconsole=", option_setup); #endif /* MODULE */ /* Linked list of all configured targets */ static LIST_HEAD(target_list); /* target_cleanup_list is used to track targets that need to be cleaned outside * of target_list_lock. It should be cleaned in the same function it is * populated. */ static LIST_HEAD(target_cleanup_list); /* This needs to be a spinlock because write_msg() cannot sleep */ static DEFINE_SPINLOCK(target_list_lock); /* This needs to be a mutex because netpoll_cleanup might sleep */ static DEFINE_MUTEX(target_cleanup_list_lock); /* * Console driver for netconsoles. Register only consoles that have * an associated target of the same type. */ static struct console netconsole_ext, netconsole; struct netconsole_target_stats { u64_stats_t xmit_drop_count; u64_stats_t enomem_count; struct u64_stats_sync syncp; }; enum console_type { CONS_BASIC = BIT(0), CONS_EXTENDED = BIT(1), }; /* Features enabled in sysdata. Contrary to userdata, this data is populated by * the kernel. The fields are designed as bitwise flags, allowing multiple * features to be set in sysdata_fields. */ enum sysdata_feature { /* Populate the CPU that sends the message */ SYSDATA_CPU_NR = BIT(0), /* Populate the task name (as in current->comm) in sysdata */ SYSDATA_TASKNAME = BIT(1), /* Kernel release/version as part of sysdata */ SYSDATA_RELEASE = BIT(2), /* Include a per-target message ID as part of sysdata */ SYSDATA_MSGID = BIT(3), }; /** * struct netconsole_target - Represents a configured netconsole target. * @list: Links this target into the target_list. * @group: Links us into the configfs subsystem hierarchy. * @userdata_group: Links to the userdata configfs hierarchy * @extradata_complete: Cached, formatted string of append * @userdata_length: String length of usedata in extradata_complete. * @sysdata_fields: Sysdata features enabled. * @msgcounter: Message sent counter. * @stats: Packet send stats for the target. Used for debugging. * @enabled: On / off knob to enable / disable target. * Visible from userspace (read-write). * We maintain a strict 1:1 correspondence between this and * whether the corresponding netpoll is active or inactive. * Also, other parameters of a target may be modified at * runtime only when it is disabled (enabled == 0). * @extended: Denotes whether console is extended or not. * @release: Denotes whether kernel release version should be prepended * to the message. Depends on extended console. * @np: The netpoll structure for this target. * Contains the other userspace visible parameters: * dev_name (read-write) * local_port (read-write) * remote_port (read-write) * local_ip (read-write) * remote_ip (read-write) * local_mac (read-only) * remote_mac (read-write) * @buf: The buffer used to send the full msg to the network stack */ struct netconsole_target { struct list_head list; #ifdef CONFIG_NETCONSOLE_DYNAMIC struct config_group group; struct config_group userdata_group; char extradata_complete[MAX_EXTRADATA_ENTRY_LEN * MAX_EXTRADATA_ITEMS]; size_t userdata_length; /* bit-wise with sysdata_feature bits */ u32 sysdata_fields; /* protected by target_list_lock */ u32 msgcounter; #endif struct netconsole_target_stats stats; bool enabled; bool extended; bool release; struct netpoll np; /* protected by target_list_lock */ char buf[MAX_PRINT_CHUNK]; }; #ifdef CONFIG_NETCONSOLE_DYNAMIC static struct configfs_subsystem netconsole_subsys; static DEFINE_MUTEX(dynamic_netconsole_mutex); static int __init dynamic_netconsole_init(void) { config_group_init(&netconsole_subsys.su_group); mutex_init(&netconsole_subsys.su_mutex); return configfs_register_subsystem(&netconsole_subsys); } static void __exit dynamic_netconsole_exit(void) { configfs_unregister_subsystem(&netconsole_subsys); } /* * Targets that were created by parsing the boot/module option string * do not exist in the configfs hierarchy (and have NULL names) and will * never go away, so make these a no-op for them. */ static void netconsole_target_get(struct netconsole_target *nt) { if (config_item_name(&nt->group.cg_item)) config_group_get(&nt->group); } static void netconsole_target_put(struct netconsole_target *nt) { if (config_item_name(&nt->group.cg_item)) config_group_put(&nt->group); } #else /* !CONFIG_NETCONSOLE_DYNAMIC */ static int __init dynamic_netconsole_init(void) { return 0; } static void __exit dynamic_netconsole_exit(void) { } /* * No danger of targets going away from under us when dynamic * reconfigurability is off. */ static void netconsole_target_get(struct netconsole_target *nt) { } static void netconsole_target_put(struct netconsole_target *nt) { } static void populate_configfs_item(struct netconsole_target *nt, int cmdline_count) { } #endif /* CONFIG_NETCONSOLE_DYNAMIC */ /* Allocate and initialize with defaults. * Note that these targets get their config_item fields zeroed-out. */ static struct netconsole_target *alloc_and_init(void) { struct netconsole_target *nt; nt = kzalloc(sizeof(*nt), GFP_KERNEL); if (!nt) return nt; if (IS_ENABLED(CONFIG_NETCONSOLE_EXTENDED_LOG)) nt->extended = true; if (IS_ENABLED(CONFIG_NETCONSOLE_PREPEND_RELEASE)) nt->release = true; nt->np.name = "netconsole"; strscpy(nt->np.dev_name, "eth0", IFNAMSIZ); nt->np.local_port = 6665; nt->np.remote_port = 6666; eth_broadcast_addr(nt->np.remote_mac); return nt; } /* Clean up every target in the cleanup_list and move the clean targets back to * the main target_list. */ static void netconsole_process_cleanups_core(void) { struct netconsole_target *nt, *tmp; unsigned long flags; /* The cleanup needs RTNL locked */ ASSERT_RTNL(); mutex_lock(&target_cleanup_list_lock); list_for_each_entry_safe(nt, tmp, &target_cleanup_list, list) { /* all entries in the cleanup_list needs to be disabled */ WARN_ON_ONCE(nt->enabled); do_netpoll_cleanup(&nt->np); /* moved the cleaned target to target_list. Need to hold both * locks */ spin_lock_irqsave(&target_list_lock, flags); list_move(&nt->list, &target_list); spin_unlock_irqrestore(&target_list_lock, flags); } WARN_ON_ONCE(!list_empty(&target_cleanup_list)); mutex_unlock(&target_cleanup_list_lock); } static void netconsole_print_banner(struct netpoll *np) { np_info(np, "local port %d\n", np->local_port); if (np->ipv6) np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6); else np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip); np_info(np, "interface name '%s'\n", np->dev_name); np_info(np, "local ethernet address '%pM'\n", np->dev_mac); np_info(np, "remote port %d\n", np->remote_port); if (np->ipv6) np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6); else np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip); np_info(np, "remote ethernet address %pM\n", np->remote_mac); } /* Parse the string and populate the `inet_addr` union. Return 0 if IPv4 is * populated, 1 if IPv6 is populated, and -1 upon failure. */ static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr) { const char *end = NULL; int len; len = strlen(str); if (!len) return -1; if (str[len - 1] == '\n') len -= 1; if (in4_pton(str, len, (void *)addr, -1, &end) > 0 && (!end || *end == 0 || *end == '\n')) return 0; if (IS_ENABLED(CONFIG_IPV6) && in6_pton(str, len, (void *)addr, -1, &end) > 0 && (!end || *end == 0 || *end == '\n')) return 1; return -1; } #ifdef CONFIG_NETCONSOLE_DYNAMIC /* * Our subsystem hierarchy is: * * /sys/kernel/config/netconsole/ * | * <target>/ * | enabled * | release * | dev_name * | local_port * | remote_port * | local_ip * | remote_ip * | local_mac * | remote_mac * | transmit_errors * | userdata/ * | <key>/ * | value * | ... * | * <target>/... */ static struct netconsole_target *to_target(struct config_item *item) { struct config_group *cfg_group; cfg_group = to_config_group(item); if (!cfg_group) return NULL; return container_of(to_config_group(item), struct netconsole_target, group); } /* Do the list cleanup with the rtnl lock hold. rtnl lock is necessary because * netdev might be cleaned-up by calling __netpoll_cleanup(), */ static void netconsole_process_cleanups(void) { /* rtnl lock is called here, because it has precedence over * target_cleanup_list_lock mutex and target_cleanup_list */ rtnl_lock(); netconsole_process_cleanups_core(); rtnl_unlock(); } /* Get rid of possible trailing newline, returning the new length */ static void trim_newline(char *s, size_t maxlen) { size_t len; len = strnlen(s, maxlen); if (s[len - 1] == '\n') s[len - 1] = '\0'; } /* * Attribute operations for netconsole_target. */ static ssize_t enabled_show(struct config_item *item, char *buf) { return sysfs_emit(buf, "%d\n", to_target(item)->enabled); } static ssize_t extended_show(struct config_item *item, char *buf) { return sysfs_emit(buf, "%d\n", to_target(item)->extended); } static ssize_t release_show(struct config_item *item, char *buf) { return sysfs_emit(buf, "%d\n", to_target(item)->release); } static ssize_t dev_name_show(struct config_item *item, char *buf) { return sysfs_emit(buf, "%s\n", to_target(item)->np.dev_name); } static ssize_t local_port_show(struct config_item *item, char *buf) { return sysfs_emit(buf, "%d\n", to_target(item)->np.local_port); } static ssize_t remote_port_show(struct config_item *item, char *buf) { return sysfs_emit(buf, "%d\n", to_target(item)->np.remote_port); } static ssize_t local_ip_show(struct config_item *item, char *buf) { struct netconsole_target *nt = to_target(item); if (nt->np.ipv6) return sysfs_emit(buf, "%pI6c\n", &nt->np.local_ip.in6); else return sysfs_emit(buf, "%pI4\n", &nt->np.local_ip); } static ssize_t remote_ip_show(struct config_item *item, char *buf) { struct netconsole_target *nt = to_target(item); if (nt->np.ipv6) return sysfs_emit(buf, "%pI6c\n", &nt->np.remote_ip.in6); else return sysfs_emit(buf, "%pI4\n", &nt->np.remote_ip); } static ssize_t local_mac_show(struct config_item *item, char *buf) { struct net_device *dev = to_target(item)->np.dev; static const u8 bcast[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; return sysfs_emit(buf, "%pM\n", dev ? dev->dev_addr : bcast); } static ssize_t remote_mac_show(struct config_item *item, char *buf) { return sysfs_emit(buf, "%pM\n", to_target(item)->np.remote_mac); } static ssize_t transmit_errors_show(struct config_item *item, char *buf) { struct netconsole_target *nt = to_target(item); u64 xmit_drop_count, enomem_count; unsigned int start; do { start = u64_stats_fetch_begin(&nt->stats.syncp); xmit_drop_count = u64_stats_read(&nt->stats.xmit_drop_count); enomem_count = u64_stats_read(&nt->stats.enomem_count); } while (u64_stats_fetch_retry(&nt->stats.syncp, start)); return sysfs_emit(buf, "%llu\n", xmit_drop_count + enomem_count); } /* configfs helper to display if cpu_nr sysdata feature is enabled */ static ssize_t sysdata_cpu_nr_enabled_show(struct config_item *item, char *buf) { struct netconsole_target *nt = to_target(item->ci_parent); bool cpu_nr_enabled; mutex_lock(&dynamic_netconsole_mutex); cpu_nr_enabled = !!(nt->sysdata_fields & SYSDATA_CPU_NR); mutex_unlock(&dynamic_netconsole_mutex); return sysfs_emit(buf, "%d\n", cpu_nr_enabled); } /* configfs helper to display if taskname sysdata feature is enabled */ static ssize_t sysdata_taskname_enabled_show(struct config_item *item, char *buf) { struct netconsole_target *nt = to_target(item->ci_parent); bool taskname_enabled; mutex_lock(&dynamic_netconsole_mutex); taskname_enabled = !!(nt->sysdata_fields & SYSDATA_TASKNAME); mutex_unlock(&dynamic_netconsole_mutex); return sysfs_emit(buf, "%d\n", taskname_enabled); } static ssize_t sysdata_release_enabled_show(struct config_item *item, char *buf) { struct netconsole_target *nt = to_target(item->ci_parent); bool release_enabled; mutex_lock(&dynamic_netconsole_mutex); release_enabled = !!(nt->sysdata_fields & SYSDATA_TASKNAME); mutex_unlock(&dynamic_netconsole_mutex); return sysfs_emit(buf, "%d\n", release_enabled); } /* Iterate in the list of target, and make sure we don't have any console * register without targets of the same type */ static void unregister_netcons_consoles(void) { struct netconsole_target *nt; u32 console_type_needed = 0; unsigned long flags; spin_lock_irqsave(&target_list_lock, flags); list_for_each_entry(nt, &target_list, list) { if (nt->extended) console_type_needed |= CONS_EXTENDED; else console_type_needed |= CONS_BASIC; } spin_unlock_irqrestore(&target_list_lock, flags); if (!(console_type_needed & CONS_EXTENDED) && console_is_registered(&netconsole_ext)) unregister_console(&netconsole_ext); if (!(console_type_needed & CONS_BASIC) && console_is_registered(&netconsole)) unregister_console(&netconsole); } static ssize_t sysdata_msgid_enabled_show(struct config_item *item, char *buf) { struct netconsole_target *nt = to_target(item->ci_parent); bool msgid_enabled; mutex_lock(&dynamic_netconsole_mutex); msgid_enabled = !!(nt->sysdata_fields & SYSDATA_MSGID); mutex_unlock(&dynamic_netconsole_mutex); return sysfs_emit(buf, "%d\n", msgid_enabled); } /* * This one is special -- targets created through the configfs interface * are not enabled (and the corresponding netpoll activated) by default. * The user is expected to set the desired parameters first (which * would enable him to dynamically add new netpoll targets for new * network interfaces as and when they come up). */ static ssize_t enabled_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); unsigned long flags; bool enabled; ssize_t ret; mutex_lock(&dynamic_netconsole_mutex); ret = kstrtobool(buf, &enabled); if (ret) goto out_unlock; ret = -EINVAL; if (enabled == nt->enabled) { pr_info("network logging has already %s\n", nt->enabled ? "started" : "stopped"); goto out_unlock; } if (enabled) { /* true */ if (nt->release && !nt->extended) { pr_err("Not enabling netconsole. Release feature requires extended log message"); goto out_unlock; } if (nt->extended && !console_is_registered(&netconsole_ext)) { netconsole_ext.flags |= CON_ENABLED; register_console(&netconsole_ext); } /* User might be enabling the basic format target for the very * first time, make sure the console is registered. */ if (!nt->extended && !console_is_registered(&netconsole)) { netconsole.flags |= CON_ENABLED; register_console(&netconsole); } /* * Skip netconsole_parser_cmdline() -- all the attributes are * already configured via configfs. Just print them out. */ netconsole_print_banner(&nt->np); ret = netpoll_setup(&nt->np); if (ret) goto out_unlock; nt->enabled = true; pr_info("network logging started\n"); } else { /* false */ /* We need to disable the netconsole before cleaning it up * otherwise we might end up in write_msg() with * nt->np.dev == NULL and nt->enabled == true */ mutex_lock(&target_cleanup_list_lock); spin_lock_irqsave(&target_list_lock, flags); nt->enabled = false; /* Remove the target from the list, while holding * target_list_lock */ list_move(&nt->list, &target_cleanup_list); spin_unlock_irqrestore(&target_list_lock, flags); mutex_unlock(&target_cleanup_list_lock); /* Unregister consoles, whose the last target of that type got * disabled. */ unregister_netcons_consoles(); } ret = strnlen(buf, count); /* Deferred cleanup */ netconsole_process_cleanups(); out_unlock: mutex_unlock(&dynamic_netconsole_mutex); return ret; } static ssize_t release_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); bool release; ssize_t ret; mutex_lock(&dynamic_netconsole_mutex); if (nt->enabled) { pr_err("target (%s) is enabled, disable to update parameters\n", config_item_name(&nt->group.cg_item)); ret = -EINVAL; goto out_unlock; } ret = kstrtobool(buf, &release); if (ret) goto out_unlock; nt->release = release; ret = strnlen(buf, count); out_unlock: mutex_unlock(&dynamic_netconsole_mutex); return ret; } static ssize_t extended_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); bool extended; ssize_t ret; mutex_lock(&dynamic_netconsole_mutex); if (nt->enabled) { pr_err("target (%s) is enabled, disable to update parameters\n", config_item_name(&nt->group.cg_item)); ret = -EINVAL; goto out_unlock; } ret = kstrtobool(buf, &extended); if (ret) goto out_unlock; nt->extended = extended; ret = strnlen(buf, count); out_unlock: mutex_unlock(&dynamic_netconsole_mutex); return ret; } static ssize_t dev_name_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); mutex_lock(&dynamic_netconsole_mutex); if (nt->enabled) { pr_err("target (%s) is enabled, disable to update parameters\n", config_item_name(&nt->group.cg_item)); mutex_unlock(&dynamic_netconsole_mutex); return -EINVAL; } strscpy(nt->np.dev_name, buf, IFNAMSIZ); trim_newline(nt->np.dev_name, IFNAMSIZ); mutex_unlock(&dynamic_netconsole_mutex); return strnlen(buf, count); } static ssize_t local_port_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); ssize_t ret = -EINVAL; mutex_lock(&dynamic_netconsole_mutex); if (nt->enabled) { pr_err("target (%s) is enabled, disable to update parameters\n", config_item_name(&nt->group.cg_item)); goto out_unlock; } ret = kstrtou16(buf, 10, &nt->np.local_port); if (ret < 0) goto out_unlock; ret = strnlen(buf, count); out_unlock: mutex_unlock(&dynamic_netconsole_mutex); return ret; } static ssize_t remote_port_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); ssize_t ret = -EINVAL; mutex_lock(&dynamic_netconsole_mutex); if (nt->enabled) { pr_err("target (%s) is enabled, disable to update parameters\n", config_item_name(&nt->group.cg_item)); goto out_unlock; } ret = kstrtou16(buf, 10, &nt->np.remote_port); if (ret < 0) goto out_unlock; ret = strnlen(buf, count); out_unlock: mutex_unlock(&dynamic_netconsole_mutex); return ret; } static ssize_t local_ip_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); ssize_t ret = -EINVAL; int ipv6; mutex_lock(&dynamic_netconsole_mutex); if (nt->enabled) { pr_err("target (%s) is enabled, disable to update parameters\n", config_item_name(&nt->group.cg_item)); goto out_unlock; } ipv6 = netpoll_parse_ip_addr(buf, &nt->np.local_ip); if (ipv6 == -1) goto out_unlock; nt->np.ipv6 = !!ipv6; ret = strnlen(buf, count); out_unlock: mutex_unlock(&dynamic_netconsole_mutex); return ret; } static ssize_t remote_ip_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); ssize_t ret = -EINVAL; int ipv6; mutex_lock(&dynamic_netconsole_mutex); if (nt->enabled) { pr_err("target (%s) is enabled, disable to update parameters\n", config_item_name(&nt->group.cg_item)); goto out_unlock; } ipv6 = netpoll_parse_ip_addr(buf, &nt->np.remote_ip); if (ipv6 == -1) goto out_unlock; nt->np.ipv6 = !!ipv6; ret = strnlen(buf, count); out_unlock: mutex_unlock(&dynamic_netconsole_mutex); return ret; } /* Count number of entries we have in extradata. * This is important because the extradata_complete only supports * MAX_EXTRADATA_ITEMS entries. Before enabling any new {user,sys}data * feature, number of entries needs to checked for available space. */ static size_t count_extradata_entries(struct netconsole_target *nt) { size_t entries; /* Userdata entries */ entries = list_count_nodes(&nt->userdata_group.cg_children); /* Plus sysdata entries */ if (nt->sysdata_fields & SYSDATA_CPU_NR) entries += 1; if (nt->sysdata_fields & SYSDATA_TASKNAME) entries += 1; if (nt->sysdata_fields & SYSDATA_RELEASE) entries += 1; if (nt->sysdata_fields & SYSDATA_MSGID) entries += 1; return entries; } static ssize_t remote_mac_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); u8 remote_mac[ETH_ALEN]; ssize_t ret = -EINVAL; mutex_lock(&dynamic_netconsole_mutex); if (nt->enabled) { pr_err("target (%s) is enabled, disable to update parameters\n", config_item_name(&nt->group.cg_item)); goto out_unlock; } if (!mac_pton(buf, remote_mac)) goto out_unlock; if (buf[MAC_ADDR_STR_LEN] && buf[MAC_ADDR_STR_LEN] != '\n') goto out_unlock; memcpy(nt->np.remote_mac, remote_mac, ETH_ALEN); ret = strnlen(buf, count); out_unlock: mutex_unlock(&dynamic_netconsole_mutex); return ret; } struct userdatum { struct config_item item; char value[MAX_EXTRADATA_VALUE_LEN]; }; static struct userdatum *to_userdatum(struct config_item *item) { return container_of(item, struct userdatum, item); } struct userdata { struct config_group group; }; static struct userdata *to_userdata(struct config_item *item) { return container_of(to_config_group(item), struct userdata, group); } static struct netconsole_target *userdata_to_target(struct userdata *ud) { struct config_group *netconsole_group; netconsole_group = to_config_group(ud->group.cg_item.ci_parent); return to_target(&netconsole_group->cg_item); } static ssize_t userdatum_value_show(struct config_item *item, char *buf) { return sysfs_emit(buf, "%s\n", &(to_userdatum(item)->value[0])); } static void update_userdata(struct netconsole_target *nt) { struct list_head *entry; int child_count = 0; unsigned long flags; spin_lock_irqsave(&target_list_lock, flags); /* Clear the current string in case the last userdatum was deleted */ nt->userdata_length = 0; nt->extradata_complete[0] = 0; list_for_each(entry, &nt->userdata_group.cg_children) { struct userdatum *udm_item; struct config_item *item; if (child_count >= MAX_EXTRADATA_ITEMS) { spin_unlock_irqrestore(&target_list_lock, flags); WARN_ON_ONCE(1); return; } child_count++; item = container_of(entry, struct config_item, ci_entry); udm_item = to_userdatum(item); /* Skip userdata with no value set */ if (strnlen(udm_item->value, MAX_EXTRADATA_VALUE_LEN) == 0) continue; /* This doesn't overflow extradata_complete since it will write * one entry length (1/MAX_EXTRADATA_ITEMS long), entry count is * checked to not exceed MAX items with child_count above */ nt->userdata_length += scnprintf(&nt->extradata_complete[nt->userdata_length], MAX_EXTRADATA_ENTRY_LEN, " %s=%s\n", item->ci_name, udm_item->value); } spin_unlock_irqrestore(&target_list_lock, flags); } static ssize_t userdatum_value_store(struct config_item *item, const char *buf, size_t count) { struct userdatum *udm = to_userdatum(item); struct netconsole_target *nt; struct userdata *ud; ssize_t ret; if (count > MAX_EXTRADATA_VALUE_LEN) return -EMSGSIZE; mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); ret = strscpy(udm->value, buf, sizeof(udm->value)); if (ret < 0) goto out_unlock; trim_newline(udm->value, sizeof(udm->value)); ud = to_userdata(item->ci_parent); nt = userdata_to_target(ud); update_userdata(nt); ret = count; out_unlock: mutex_unlock(&dynamic_netconsole_mutex); mutex_unlock(&netconsole_subsys.su_mutex); return ret; } /* disable_sysdata_feature - Disable sysdata feature and clean sysdata * @nt: target that is disabling the feature * @feature: feature being disabled */ static void disable_sysdata_feature(struct netconsole_target *nt, enum sysdata_feature feature) { nt->sysdata_fields &= ~feature; nt->extradata_complete[nt->userdata_length] = 0; } static ssize_t sysdata_msgid_enabled_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item->ci_parent); bool msgid_enabled, curr; ssize_t ret; ret = kstrtobool(buf, &msgid_enabled); if (ret) return ret; mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_MSGID); if (msgid_enabled == curr) goto unlock_ok; if (msgid_enabled && count_extradata_entries(nt) >= MAX_EXTRADATA_ITEMS) { ret = -ENOSPC; goto unlock; } if (msgid_enabled) nt->sysdata_fields |= SYSDATA_MSGID; else disable_sysdata_feature(nt, SYSDATA_MSGID); unlock_ok: ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); mutex_unlock(&netconsole_subsys.su_mutex); return ret; } static ssize_t sysdata_release_enabled_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item->ci_parent); bool release_enabled, curr; ssize_t ret; ret = kstrtobool(buf, &release_enabled); if (ret) return ret; mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_RELEASE); if (release_enabled == curr) goto unlock_ok; if (release_enabled && count_extradata_entries(nt) >= MAX_EXTRADATA_ITEMS) { ret = -ENOSPC; goto unlock; } if (release_enabled) nt->sysdata_fields |= SYSDATA_RELEASE; else disable_sysdata_feature(nt, SYSDATA_RELEASE); unlock_ok: ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); mutex_unlock(&netconsole_subsys.su_mutex); return ret; } static ssize_t sysdata_taskname_enabled_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item->ci_parent); bool taskname_enabled, curr; ssize_t ret; ret = kstrtobool(buf, &taskname_enabled); if (ret) return ret; mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_TASKNAME); if (taskname_enabled == curr) goto unlock_ok; if (taskname_enabled && count_extradata_entries(nt) >= MAX_EXTRADATA_ITEMS) { ret = -ENOSPC; goto unlock; } if (taskname_enabled) nt->sysdata_fields |= SYSDATA_TASKNAME; else disable_sysdata_feature(nt, SYSDATA_TASKNAME); unlock_ok: ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); mutex_unlock(&netconsole_subsys.su_mutex); return ret; } /* configfs helper to sysdata cpu_nr feature */ static ssize_t sysdata_cpu_nr_enabled_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item->ci_parent); bool cpu_nr_enabled, curr; ssize_t ret; ret = kstrtobool(buf, &cpu_nr_enabled); if (ret) return ret; mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_CPU_NR); if (cpu_nr_enabled == curr) /* no change requested */ goto unlock_ok; if (cpu_nr_enabled && count_extradata_entries(nt) >= MAX_EXTRADATA_ITEMS) { /* user wants the new feature, but there is no space in the * buffer. */ ret = -ENOSPC; goto unlock; } if (cpu_nr_enabled) nt->sysdata_fields |= SYSDATA_CPU_NR; else /* This is special because extradata_complete might have * remaining data from previous sysdata, and it needs to be * cleaned. */ disable_sysdata_feature(nt, SYSDATA_CPU_NR); unlock_ok: ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); mutex_unlock(&netconsole_subsys.su_mutex); return ret; } CONFIGFS_ATTR(userdatum_, value); CONFIGFS_ATTR(sysdata_, cpu_nr_enabled); CONFIGFS_ATTR(sysdata_, taskname_enabled); CONFIGFS_ATTR(sysdata_, release_enabled); CONFIGFS_ATTR(sysdata_, msgid_enabled); static struct configfs_attribute *userdatum_attrs[] = { &userdatum_attr_value, NULL, }; static void userdatum_release(struct config_item *item) { kfree(to_userdatum(item)); } static struct configfs_item_operations userdatum_ops = { .release = userdatum_release, }; static const struct config_item_type userdatum_type = { .ct_item_ops = &userdatum_ops, .ct_attrs = userdatum_attrs, .ct_owner = THIS_MODULE, }; static struct config_item *userdatum_make_item(struct config_group *group, const char *name) { struct netconsole_target *nt; struct userdatum *udm; struct userdata *ud; if (strlen(name) > MAX_EXTRADATA_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); ud = to_userdata(&group->cg_item); nt = userdata_to_target(ud); if (count_extradata_entries(nt) >= MAX_EXTRADATA_ITEMS) return ERR_PTR(-ENOSPC); udm = kzalloc(sizeof(*udm), GFP_KERNEL); if (!udm) return ERR_PTR(-ENOMEM); config_item_init_type_name(&udm->item, name, &userdatum_type); return &udm->item; } static void userdatum_drop(struct config_group *group, struct config_item *item) { struct netconsole_target *nt; struct userdata *ud; ud = to_userdata(&group->cg_item); nt = userdata_to_target(ud); mutex_lock(&dynamic_netconsole_mutex); update_userdata(nt); config_item_put(item); mutex_unlock(&dynamic_netconsole_mutex); } static struct configfs_attribute *userdata_attrs[] = { &sysdata_attr_cpu_nr_enabled, &sysdata_attr_taskname_enabled, &sysdata_attr_release_enabled, &sysdata_attr_msgid_enabled, NULL, }; static struct configfs_group_operations userdata_ops = { .make_item = userdatum_make_item, .drop_item = userdatum_drop, }; static const struct config_item_type userdata_type = { .ct_item_ops = &userdatum_ops, .ct_group_ops = &userdata_ops, .ct_attrs = userdata_attrs, .ct_owner = THIS_MODULE, }; CONFIGFS_ATTR(, enabled); CONFIGFS_ATTR(, extended); CONFIGFS_ATTR(, dev_name); CONFIGFS_ATTR(, local_port); CONFIGFS_ATTR(, remote_port); CONFIGFS_ATTR(, local_ip); CONFIGFS_ATTR(, remote_ip); CONFIGFS_ATTR_RO(, local_mac); CONFIGFS_ATTR(, remote_mac); CONFIGFS_ATTR(, release); CONFIGFS_ATTR_RO(, transmit_errors); static struct configfs_attribute *netconsole_target_attrs[] = { &attr_enabled, &attr_extended, &attr_release, &attr_dev_name, &attr_local_port, &attr_remote_port, &attr_local_ip, &attr_remote_ip, &attr_local_mac, &attr_remote_mac, &attr_transmit_errors, NULL, }; /* * Item operations and type for netconsole_target. */ static void netconsole_target_release(struct config_item *item) { kfree(to_target(item)); } static struct configfs_item_operations netconsole_target_item_ops = { .release = netconsole_target_release, }; static const struct config_item_type netconsole_target_type = { .ct_attrs = netconsole_target_attrs, .ct_item_ops = &netconsole_target_item_ops, .ct_owner = THIS_MODULE, }; static void init_target_config_group(struct netconsole_target *nt, const char *name) { config_group_init_type_name(&nt->group, name, &netconsole_target_type); config_group_init_type_name(&nt->userdata_group, "userdata", &userdata_type); configfs_add_default_group(&nt->userdata_group, &nt->group); } static struct netconsole_target *find_cmdline_target(const char *name) { struct netconsole_target *nt, *ret = NULL; unsigned long flags; spin_lock_irqsave(&target_list_lock, flags); list_for_each_entry(nt, &target_list, list) { if (!strcmp(nt->group.cg_item.ci_name, name)) { ret = nt; break; } } spin_unlock_irqrestore(&target_list_lock, flags); return ret; } /* * Group operations and type for netconsole_subsys. */ static struct config_group *make_netconsole_target(struct config_group *group, const char *name) { struct netconsole_target *nt; unsigned long flags; /* Checking if a target by this name was created at boot time. If so, * attach a configfs entry to that target. This enables dynamic * control. */ if (!strncmp(name, NETCONSOLE_PARAM_TARGET_PREFIX, strlen(NETCONSOLE_PARAM_TARGET_PREFIX))) { nt = find_cmdline_target(name); if (nt) { init_target_config_group(nt, name); return &nt->group; } } nt = alloc_and_init(); if (!nt) return ERR_PTR(-ENOMEM); /* Initialize the config_group member */ init_target_config_group(nt, name); /* Adding, but it is disabled */ spin_lock_irqsave(&target_list_lock, flags); list_add(&nt->list, &target_list); spin_unlock_irqrestore(&target_list_lock, flags); return &nt->group; } static void drop_netconsole_target(struct config_group *group, struct config_item *item) { unsigned long flags; struct netconsole_target *nt = to_target(item); spin_lock_irqsave(&target_list_lock, flags); list_del(&nt->list); spin_unlock_irqrestore(&target_list_lock, flags); /* * The target may have never been enabled, or was manually disabled * before being removed so netpoll may have already been cleaned up. */ if (nt->enabled) netpoll_cleanup(&nt->np); config_item_put(&nt->group.cg_item); } static struct configfs_group_operations netconsole_subsys_group_ops = { .make_group = make_netconsole_target, .drop_item = drop_netconsole_target, }; static const struct config_item_type netconsole_subsys_type = { .ct_group_ops = &netconsole_subsys_group_ops, .ct_owner = THIS_MODULE, }; /* The netconsole configfs subsystem */ static struct configfs_subsystem netconsole_subsys = { .su_group = { .cg_item = { .ci_namebuf = "netconsole", .ci_type = &netconsole_subsys_type, }, }, }; static void populate_configfs_item(struct netconsole_target *nt, int cmdline_count) { char target_name[16]; snprintf(target_name, sizeof(target_name), "%s%d", NETCONSOLE_PARAM_TARGET_PREFIX, cmdline_count); init_target_config_group(nt, target_name); } static int sysdata_append_cpu_nr(struct netconsole_target *nt, int offset) { /* Append cpu=%d at extradata_complete after userdata str */ return scnprintf(&nt->extradata_complete[offset], MAX_EXTRADATA_ENTRY_LEN, " cpu=%u\n", raw_smp_processor_id()); } static int sysdata_append_taskname(struct netconsole_target *nt, int offset) { return scnprintf(&nt->extradata_complete[offset], MAX_EXTRADATA_ENTRY_LEN, " taskname=%s\n", current->comm); } static int sysdata_append_release(struct netconsole_target *nt, int offset) { return scnprintf(&nt->extradata_complete[offset], MAX_EXTRADATA_ENTRY_LEN, " release=%s\n", init_utsname()->release); } static int sysdata_append_msgid(struct netconsole_target *nt, int offset) { wrapping_assign_add(nt->msgcounter, 1); return scnprintf(&nt->extradata_complete[offset], MAX_EXTRADATA_ENTRY_LEN, " msgid=%u\n", nt->msgcounter); } /* * prepare_extradata - append sysdata at extradata_complete in runtime * @nt: target to send message to */ static int prepare_extradata(struct netconsole_target *nt) { int extradata_len; /* userdata was appended when configfs write helper was called * by update_userdata(). */ extradata_len = nt->userdata_length; if (!nt->sysdata_fields) goto out; if (nt->sysdata_fields & SYSDATA_CPU_NR) extradata_len += sysdata_append_cpu_nr(nt, extradata_len); if (nt->sysdata_fields & SYSDATA_TASKNAME) extradata_len += sysdata_append_taskname(nt, extradata_len); if (nt->sysdata_fields & SYSDATA_RELEASE) extradata_len += sysdata_append_release(nt, extradata_len); if (nt->sysdata_fields & SYSDATA_MSGID) extradata_len += sysdata_append_msgid(nt, extradata_len); WARN_ON_ONCE(extradata_len > MAX_EXTRADATA_ENTRY_LEN * MAX_EXTRADATA_ITEMS); out: return extradata_len; } #else /* CONFIG_NETCONSOLE_DYNAMIC not set */ static int prepare_extradata(struct netconsole_target *nt) { return 0; } #endif /* CONFIG_NETCONSOLE_DYNAMIC */ /* Handle network interface device notifications */ static int netconsole_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { unsigned long flags; struct netconsole_target *nt, *tmp; struct net_device *dev = netdev_notifier_info_to_dev(ptr); bool stopped = false; if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER || event == NETDEV_RELEASE || event == NETDEV_JOIN)) goto done; mutex_lock(&target_cleanup_list_lock); spin_lock_irqsave(&target_list_lock, flags); list_for_each_entry_safe(nt, tmp, &target_list, list) { netconsole_target_get(nt); if (nt->np.dev == dev) { switch (event) { case NETDEV_CHANGENAME: strscpy(nt->np.dev_name, dev->name, IFNAMSIZ); break; case NETDEV_RELEASE: case NETDEV_JOIN: case NETDEV_UNREGISTER: nt->enabled = false; list_move(&nt->list, &target_cleanup_list); stopped = true; } } netconsole_target_put(nt); } spin_unlock_irqrestore(&target_list_lock, flags); mutex_unlock(&target_cleanup_list_lock); if (stopped) { const char *msg = "had an event"; switch (event) { case NETDEV_UNREGISTER: msg = "unregistered"; break; case NETDEV_RELEASE: msg = "released slaves"; break; case NETDEV_JOIN: msg = "is joining a master device"; break; } pr_info("network logging stopped on interface %s as it %s\n", dev->name, msg); } /* Process target_cleanup_list entries. By the end, target_cleanup_list * should be empty */ netconsole_process_cleanups_core(); done: return NOTIFY_DONE; } static struct notifier_block netconsole_netdev_notifier = { .notifier_call = netconsole_netdev_event, }; /** * send_udp - Wrapper for netpoll_send_udp that counts errors * @nt: target to send message to * @msg: message to send * @len: length of message * * Calls netpoll_send_udp and classifies the return value. If an error * occurred it increments statistics in nt->stats accordingly. * Only calls netpoll_send_udp if CONFIG_NETCONSOLE_DYNAMIC is disabled. */ static void send_udp(struct netconsole_target *nt, const char *msg, int len) { int result = netpoll_send_udp(&nt->np, msg, len); if (IS_ENABLED(CONFIG_NETCONSOLE_DYNAMIC)) { if (result == NET_XMIT_DROP) { u64_stats_update_begin(&nt->stats.syncp); u64_stats_inc(&nt->stats.xmit_drop_count); u64_stats_update_end(&nt->stats.syncp); } else if (result == -ENOMEM) { u64_stats_update_begin(&nt->stats.syncp); u64_stats_inc(&nt->stats.enomem_count); u64_stats_update_end(&nt->stats.syncp); } } } static void send_msg_no_fragmentation(struct netconsole_target *nt, const char *msg, int msg_len, int release_len) { const char *extradata = NULL; const char *release; #ifdef CONFIG_NETCONSOLE_DYNAMIC extradata = nt->extradata_complete; #endif if (release_len) { release = init_utsname()->release; scnprintf(nt->buf, MAX_PRINT_CHUNK, "%s,%s", release, msg); msg_len += release_len; } else { memcpy(nt->buf, msg, msg_len); } if (extradata) msg_len += scnprintf(&nt->buf[msg_len], MAX_PRINT_CHUNK - msg_len, "%s", extradata); send_udp(nt, nt->buf, msg_len); } static void append_release(char *buf) { const char *release; release = init_utsname()->release; scnprintf(buf, MAX_PRINT_CHUNK, "%s,", release); } static void send_fragmented_body(struct netconsole_target *nt, const char *msgbody, int header_len, int msgbody_len, int extradata_len) { int sent_extradata, preceding_bytes; const char *extradata = NULL; int body_len, offset = 0; #ifdef CONFIG_NETCONSOLE_DYNAMIC extradata = nt->extradata_complete; #endif /* body_len represents the number of bytes that will be sent. This is * bigger than MAX_PRINT_CHUNK, thus, it will be split in multiple * packets */ body_len = msgbody_len + extradata_len; /* In each iteration of the while loop below, we send a packet * containing the header and a portion of the body. The body is * composed of two parts: msgbody and extradata. We keep track of how * many bytes have been sent so far using the offset variable, which * ranges from 0 to the total length of the body. */ while (offset < body_len) { int this_header = header_len; bool msgbody_written = false; int this_offset = 0; int this_chunk = 0; this_header += scnprintf(nt->buf + this_header, MAX_PRINT_CHUNK - this_header, ",ncfrag=%d/%d;", offset, body_len); /* Not all msgbody data has been written yet */ if (offset < msgbody_len) { this_chunk = min(msgbody_len - offset, MAX_PRINT_CHUNK - this_header); if (WARN_ON_ONCE(this_chunk <= 0)) return; memcpy(nt->buf + this_header, msgbody + offset, this_chunk); this_offset += this_chunk; } /* msgbody was finally written, either in the previous * messages and/or in the current buf. Time to write * the extradata. */ msgbody_written |= offset + this_offset >= msgbody_len; /* Msg body is fully written and there is pending extradata to * write, append extradata in this chunk */ if (msgbody_written && offset + this_offset < body_len) { /* Track how much user data was already sent. First * time here, sent_userdata is zero */ sent_extradata = (offset + this_offset) - msgbody_len; /* offset of bytes used in current buf */ preceding_bytes = this_chunk + this_header; if (WARN_ON_ONCE(sent_extradata < 0)) return; this_chunk = min(extradata_len - sent_extradata, MAX_PRINT_CHUNK - preceding_bytes); if (WARN_ON_ONCE(this_chunk < 0)) /* this_chunk could be zero if all the previous * message used all the buffer. This is not a * problem, extradata will be sent in the next * iteration */ return; memcpy(nt->buf + this_header + this_offset, extradata + sent_extradata, this_chunk); this_offset += this_chunk; } send_udp(nt, nt->buf, this_header + this_offset); offset += this_offset; } } static void send_msg_fragmented(struct netconsole_target *nt, const char *msg, int msg_len, int release_len, int extradata_len) { int header_len, msgbody_len; const char *msgbody; /* need to insert extra header fields, detect header and msgbody */ msgbody = memchr(msg, ';', msg_len); if (WARN_ON_ONCE(!msgbody)) return; header_len = msgbody - msg; msgbody_len = msg_len - header_len - 1; msgbody++; /* * Transfer multiple chunks with the following extra header. * "ncfrag=<byte-offset>/<total-bytes>" */ if (release_len) append_release(nt->buf); /* Copy the header into the buffer */ memcpy(nt->buf + release_len, msg, header_len); header_len += release_len; /* for now on, the header will be persisted, and the msgbody * will be replaced */ send_fragmented_body(nt, msgbody, header_len, msgbody_len, extradata_len); } /** * send_ext_msg_udp - send extended log message to target * @nt: target to send message to * @msg: extended log message to send * @msg_len: length of message * * Transfer extended log @msg to @nt. If @msg is longer than * MAX_PRINT_CHUNK, it'll be split and transmitted in multiple chunks with * ncfrag header field added to identify them. */ static void send_ext_msg_udp(struct netconsole_target *nt, const char *msg, int msg_len) { int release_len = 0; int extradata_len; extradata_len = prepare_extradata(nt); if (nt->release) release_len = strlen(init_utsname()->release) + 1; if (msg_len + release_len + extradata_len <= MAX_PRINT_CHUNK) return send_msg_no_fragmentation(nt, msg, msg_len, release_len); return send_msg_fragmented(nt, msg, msg_len, release_len, extradata_len); } static void write_ext_msg(struct console *con, const char *msg, unsigned int len) { struct netconsole_target *nt; unsigned long flags; if ((oops_only && !oops_in_progress) || list_empty(&target_list)) return; spin_lock_irqsave(&target_list_lock, flags); list_for_each_entry(nt, &target_list, list) if (nt->extended && nt->enabled && netif_running(nt->np.dev)) send_ext_msg_udp(nt, msg, len); spin_unlock_irqrestore(&target_list_lock, flags); } static void write_msg(struct console *con, const char *msg, unsigned int len) { int frag, left; unsigned long flags; struct netconsole_target *nt; const char *tmp; if (oops_only && !oops_in_progress) return; /* Avoid taking lock and disabling interrupts unnecessarily */ if (list_empty(&target_list)) return; spin_lock_irqsave(&target_list_lock, flags); list_for_each_entry(nt, &target_list, list) { if (!nt->extended && nt->enabled && netif_running(nt->np.dev)) { /* * We nest this inside the for-each-target loop above * so that we're able to get as much logging out to * at least one target if we die inside here, instead * of unnecessarily keeping all targets in lock-step. */ tmp = msg; for (left = len; left;) { frag = min(left, MAX_PRINT_CHUNK); send_udp(nt, tmp, frag); tmp += frag; left -= frag; } } } spin_unlock_irqrestore(&target_list_lock, flags); } static int netconsole_parser_cmdline(struct netpoll *np, char *opt) { bool ipversion_set = false; char *cur = opt; char *delim; int ipv6; if (*cur != '@') { delim = strchr(cur, '@'); if (!delim) goto parse_failed; *delim = 0; if (kstrtou16(cur, 10, &np->local_port)) goto parse_failed; cur = delim; } cur++; if (*cur != '/') { ipversion_set = true; delim = strchr(cur, '/'); if (!delim) goto parse_failed; *delim = 0; ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip); if (ipv6 < 0) goto parse_failed; else np->ipv6 = (bool)ipv6; cur = delim; } cur++; if (*cur != ',') { /* parse out dev_name or dev_mac */ delim = strchr(cur, ','); if (!delim) goto parse_failed; *delim = 0; np->dev_name[0] = '\0'; eth_broadcast_addr(np->dev_mac); if (!strchr(cur, ':')) strscpy(np->dev_name, cur, sizeof(np->dev_name)); else if (!mac_pton(cur, np->dev_mac)) goto parse_failed; cur = delim; } cur++; if (*cur != '@') { /* dst port */ delim = strchr(cur, '@'); if (!delim) goto parse_failed; *delim = 0; if (*cur == ' ' || *cur == '\t') np_info(np, "warning: whitespace is not allowed\n"); if (kstrtou16(cur, 10, &np->remote_port)) goto parse_failed; cur = delim; } cur++; /* dst ip */ delim = strchr(cur, '/'); if (!delim) goto parse_failed; *delim = 0; ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip); if (ipv6 < 0) goto parse_failed; else if (ipversion_set && np->ipv6 != (bool)ipv6) goto parse_failed; else np->ipv6 = (bool)ipv6; cur = delim + 1; if (*cur != 0) { /* MAC address */ if (!mac_pton(cur, np->remote_mac)) goto parse_failed; } netconsole_print_banner(np); return 0; parse_failed: np_info(np, "couldn't parse config at '%s'!\n", cur); return -1; } /* Allocate new target (from boot/module param) and setup netpoll for it */ static struct netconsole_target *alloc_param_target(char *target_config, int cmdline_count) { struct netconsole_target *nt; int err; nt = alloc_and_init(); if (!nt) { err = -ENOMEM; goto fail; } if (*target_config == '+') { nt->extended = true; target_config++; } if (*target_config == 'r') { if (!nt->extended) { pr_err("Netconsole configuration error. Release feature requires extended log message"); err = -EINVAL; goto fail; } nt->release = true; target_config++; } /* Parse parameters and setup netpoll */ err = netconsole_parser_cmdline(&nt->np, target_config); if (err) goto fail; err = netpoll_setup(&nt->np); if (err) { pr_err("Not enabling netconsole for %s%d. Netpoll setup failed\n", NETCONSOLE_PARAM_TARGET_PREFIX, cmdline_count); if (!IS_ENABLED(CONFIG_NETCONSOLE_DYNAMIC)) /* only fail if dynamic reconfiguration is set, * otherwise, keep the target in the list, but disabled. */ goto fail; } else { nt->enabled = true; } populate_configfs_item(nt, cmdline_count); return nt; fail: kfree(nt); return ERR_PTR(err); } /* Cleanup netpoll for given target (from boot/module param) and free it */ static void free_param_target(struct netconsole_target *nt) { netpoll_cleanup(&nt->np); kfree(nt); } static struct console netconsole_ext = { .name = "netcon_ext", .flags = CON_ENABLED | CON_EXTENDED, .write = write_ext_msg, }; static struct console netconsole = { .name = "netcon", .flags = CON_ENABLED, .write = write_msg, }; static int __init init_netconsole(void) { int err; struct netconsole_target *nt, *tmp; u32 console_type_needed = 0; unsigned int count = 0; unsigned long flags; char *target_config; char *input = config; if (strnlen(input, MAX_PARAM_LENGTH)) { while ((target_config = strsep(&input, ";"))) { nt = alloc_param_target(target_config, count); if (IS_ERR(nt)) { if (IS_ENABLED(CONFIG_NETCONSOLE_DYNAMIC)) continue; err = PTR_ERR(nt); goto fail; } /* Dump existing printks when we register */ if (nt->extended) { console_type_needed |= CONS_EXTENDED; netconsole_ext.flags |= CON_PRINTBUFFER; } else { console_type_needed |= CONS_BASIC; netconsole.flags |= CON_PRINTBUFFER; } spin_lock_irqsave(&target_list_lock, flags); list_add(&nt->list, &target_list); spin_unlock_irqrestore(&target_list_lock, flags); count++; } } err = register_netdevice_notifier(&netconsole_netdev_notifier); if (err) goto fail; err = dynamic_netconsole_init(); if (err) goto undonotifier; if (console_type_needed & CONS_EXTENDED) register_console(&netconsole_ext); if (console_type_needed & CONS_BASIC) register_console(&netconsole); pr_info("network logging started\n"); return err; undonotifier: unregister_netdevice_notifier(&netconsole_netdev_notifier); fail: pr_err("cleaning up\n"); /* * Remove all targets and destroy them (only targets created * from the boot/module option exist here). Skipping the list * lock is safe here, and netpoll_cleanup() will sleep. */ list_for_each_entry_safe(nt, tmp, &target_list, list) { list_del(&nt->list); free_param_target(nt); } return err; } static void __exit cleanup_netconsole(void) { struct netconsole_target *nt, *tmp; if (console_is_registered(&netconsole_ext)) unregister_console(&netconsole_ext); if (console_is_registered(&netconsole)) unregister_console(&netconsole); dynamic_netconsole_exit(); unregister_netdevice_notifier(&netconsole_netdev_notifier); /* * Targets created via configfs pin references on our module * and would first be rmdir(2)'ed from userspace. We reach * here only when they are already destroyed, and only those * created from the boot/module option are left, so remove and * destroy them. Skipping the list lock is safe here, and * netpoll_cleanup() will sleep. */ list_for_each_entry_safe(nt, tmp, &target_list, list) { list_del(&nt->list); free_param_target(nt); } } /* * Use late_initcall to ensure netconsole is * initialized after network device driver if built-in. * * late_initcall() and module_init() are identical if built as module. */ late_initcall(init_netconsole); module_exit(cleanup_netconsole);
7 1476 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SHRINKER_H #define _LINUX_SHRINKER_H #include <linux/atomic.h> #include <linux/types.h> #include <linux/refcount.h> #include <linux/completion.h> #define SHRINKER_UNIT_BITS BITS_PER_LONG /* * Bitmap and deferred work of shrinker::id corresponding to memcg-aware * shrinkers, which have elements charged to the memcg. */ struct shrinker_info_unit { atomic_long_t nr_deferred[SHRINKER_UNIT_BITS]; DECLARE_BITMAP(map, SHRINKER_UNIT_BITS); }; struct shrinker_info { struct rcu_head rcu; int map_nr_max; struct shrinker_info_unit *unit[]; }; /* * This struct is used to pass information from page reclaim to the shrinkers. * We consolidate the values for easier extension later. * * The 'gfpmask' refers to the allocation we are currently trying to * fulfil. */ struct shrink_control { gfp_t gfp_mask; /* current node being shrunk (for NUMA aware shrinkers) */ int nid; /* * How many objects scan_objects should scan and try to reclaim. * This is reset before every call, so it is safe for callees * to modify. */ unsigned long nr_to_scan; /* * How many objects did scan_objects process? * This defaults to nr_to_scan before every call, but the callee * should track its actual progress. */ unsigned long nr_scanned; /* current memcg being shrunk (for memcg aware shrinkers) */ struct mem_cgroup *memcg; }; #define SHRINK_STOP (~0UL) #define SHRINK_EMPTY (~0UL - 1) /* * A callback you can register to apply pressure to ageable caches. * * @count_objects should return the number of freeable items in the cache. If * there are no objects to free, it should return SHRINK_EMPTY, while 0 is * returned in cases of the number of freeable items cannot be determined * or shrinker should skip this cache for this time (e.g., their number * is below shrinkable limit). No deadlock checks should be done during the * count callback - the shrinker relies on aggregating scan counts that couldn't * be executed due to potential deadlocks to be run at a later call when the * deadlock condition is no longer pending. * * @scan_objects will only be called if @count_objects returned a non-zero * value for the number of freeable objects. The callout should scan the cache * and attempt to free items from the cache. It should then return the number * of objects freed during the scan, or SHRINK_STOP if progress cannot be made * due to potential deadlocks. If SHRINK_STOP is returned, then no further * attempts to call the @scan_objects will be made from the current reclaim * context. * * @flags determine the shrinker abilities, like numa awareness */ struct shrinker { unsigned long (*count_objects)(struct shrinker *, struct shrink_control *sc); unsigned long (*scan_objects)(struct shrinker *, struct shrink_control *sc); long batch; /* reclaim batch size, 0 = default */ int seeks; /* seeks to recreate an obj */ unsigned flags; /* * The reference count of this shrinker. Registered shrinker have an * initial refcount of 1, then the lookup operations are now allowed * to use it via shrinker_try_get(). Later in the unregistration step, * the initial refcount will be discarded, and will free the shrinker * asynchronously via RCU after its refcount reaches 0. */ refcount_t refcount; struct completion done; /* use to wait for refcount to reach 0 */ struct rcu_head rcu; void *private_data; /* These are for internal use */ struct list_head list; #ifdef CONFIG_MEMCG /* ID in shrinker_idr */ int id; #endif #ifdef CONFIG_SHRINKER_DEBUG int debugfs_id; const char *name; struct dentry *debugfs_entry; #endif /* objs pending delete, per node */ atomic_long_t *nr_deferred; }; #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ /* Internal flags */ #define SHRINKER_REGISTERED BIT(0) #define SHRINKER_ALLOCATED BIT(1) /* Flags for users to use */ #define SHRINKER_NUMA_AWARE BIT(2) #define SHRINKER_MEMCG_AWARE BIT(3) /* * It just makes sense when the shrinker is also MEMCG_AWARE for now, * non-MEMCG_AWARE shrinker should not have this flag set. */ #define SHRINKER_NONSLAB BIT(4) __printf(2, 3) struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...); void shrinker_register(struct shrinker *shrinker); void shrinker_free(struct shrinker *shrinker); static inline bool shrinker_try_get(struct shrinker *shrinker) { return refcount_inc_not_zero(&shrinker->refcount); } static inline void shrinker_put(struct shrinker *shrinker) { if (refcount_dec_and_test(&shrinker->refcount)) complete(&shrinker->done); } #ifdef CONFIG_SHRINKER_DEBUG extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...); #else /* CONFIG_SHRINKER_DEBUG */ static inline __printf(2, 3) int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) { return 0; } #endif /* CONFIG_SHRINKER_DEBUG */ #endif /* _LINUX_SHRINKER_H */
34 34 6 6 6 6 2 1 1 1 2 2 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 /* * net/tipc/monitor.c * * Copyright (c) 2016, Ericsson AB * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <net/genetlink.h> #include "core.h" #include "addr.h" #include "monitor.h" #include "bearer.h" #define MAX_MON_DOMAIN 64 #define MON_TIMEOUT 120000 #define MAX_PEER_DOWN_EVENTS 4 /* struct tipc_mon_domain: domain record to be transferred between peers * @len: actual size of domain record * @gen: current generation of sender's domain * @ack_gen: most recent generation of self's domain acked by peer * @member_cnt: number of domain member nodes described in this record * @up_map: bit map indicating which of the members the sender considers up * @members: identity of the domain members */ struct tipc_mon_domain { u16 len; u16 gen; u16 ack_gen; u16 member_cnt; u64 up_map; u32 members[MAX_MON_DOMAIN]; }; /* struct tipc_peer: state of a peer node and its domain * @addr: tipc node identity of peer * @head_map: shows which other nodes currently consider peer 'up' * @domain: most recent domain record from peer * @hash: position in hashed lookup list * @list: position in linked list, in circular ascending order by 'addr' * @applied: number of reported domain members applied on this monitor list * @is_up: peer is up as seen from this node * @is_head: peer is assigned domain head as seen from this node * @is_local: peer is in local domain and should be continuously monitored * @down_cnt: - numbers of other peers which have reported this on lost */ struct tipc_peer { u32 addr; struct tipc_mon_domain *domain; struct hlist_node hash; struct list_head list; u8 applied; u8 down_cnt; bool is_up; bool is_head; bool is_local; }; struct tipc_monitor { struct hlist_head peers[NODE_HTABLE_SIZE]; int peer_cnt; struct tipc_peer *self; rwlock_t lock; struct tipc_mon_domain cache; u16 list_gen; u16 dom_gen; struct net *net; struct timer_list timer; unsigned long timer_intv; }; static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id) { return tipc_net(net)->monitors[bearer_id]; } const int tipc_max_domain_size = sizeof(struct tipc_mon_domain); static inline u16 mon_cpu_to_le16(u16 val) { return (__force __u16)htons(val); } static inline u32 mon_cpu_to_le32(u32 val) { return (__force __u32)htonl(val); } static inline u64 mon_cpu_to_le64(u64 val) { return (__force __u64)cpu_to_be64(val); } static inline u16 mon_le16_to_cpu(u16 val) { return ntohs((__force __be16)val); } static inline u32 mon_le32_to_cpu(u32 val) { return ntohl((__force __be32)val); } static inline u64 mon_le64_to_cpu(u64 val) { return be64_to_cpu((__force __be64)val); } /* dom_rec_len(): actual length of domain record for transport */ static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt) { return (offsetof(struct tipc_mon_domain, members)) + (mcnt * sizeof(u32)); } /* dom_size() : calculate size of own domain based on number of peers */ static int dom_size(int peers) { int i = 0; while ((i * i) < peers) i++; return min(i, MAX_MON_DOMAIN); } static void map_set(u64 *up_map, int i, unsigned int v) { *up_map &= ~(1ULL << i); *up_map |= ((u64)v << i); } static int map_get(u64 up_map, int i) { return (up_map & (1ULL << i)) >> i; } static struct tipc_peer *peer_prev(struct tipc_peer *peer) { return list_last_entry(&peer->list, struct tipc_peer, list); } static struct tipc_peer *peer_nxt(struct tipc_peer *peer) { return list_first_entry(&peer->list, struct tipc_peer, list); } static struct tipc_peer *peer_head(struct tipc_peer *peer) { while (!peer->is_head) peer = peer_prev(peer); return peer; } static struct tipc_peer *get_peer(struct tipc_monitor *mon, u32 addr) { struct tipc_peer *peer; unsigned int thash = tipc_hashfn(addr); hlist_for_each_entry(peer, &mon->peers[thash], hash) { if (peer->addr == addr) return peer; } return NULL; } static struct tipc_peer *get_self(struct net *net, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); return mon->self; } static inline bool tipc_mon_is_active(struct net *net, struct tipc_monitor *mon) { struct tipc_net *tn = tipc_net(net); return mon->peer_cnt > tn->mon_threshold; } /* mon_identify_lost_members() : - identify amd mark potentially lost members */ static void mon_identify_lost_members(struct tipc_peer *peer, struct tipc_mon_domain *dom_bef, int applied_bef) { struct tipc_peer *member = peer; struct tipc_mon_domain *dom_aft = peer->domain; int applied_aft = peer->applied; int i; for (i = 0; i < applied_bef; i++) { member = peer_nxt(member); /* Do nothing if self or peer already see member as down */ if (!member->is_up || !map_get(dom_bef->up_map, i)) continue; /* Loss of local node must be detected by active probing */ if (member->is_local) continue; /* Start probing if member was removed from applied domain */ if (!applied_aft || (applied_aft < i)) { member->down_cnt = 1; continue; } /* Member loss is confirmed if it is still in applied domain */ if (!map_get(dom_aft->up_map, i)) member->down_cnt++; } } /* mon_apply_domain() : match a peer's domain record against monitor list */ static void mon_apply_domain(struct tipc_monitor *mon, struct tipc_peer *peer) { struct tipc_mon_domain *dom = peer->domain; struct tipc_peer *member; u32 addr; int i; if (!dom || !peer->is_up) return; /* Scan across domain members and match against monitor list */ peer->applied = 0; member = peer_nxt(peer); for (i = 0; i < dom->member_cnt; i++) { addr = dom->members[i]; if (addr != member->addr) return; peer->applied++; member = peer_nxt(member); } } /* mon_update_local_domain() : update after peer addition/removal/up/down */ static void mon_update_local_domain(struct tipc_monitor *mon) { struct tipc_peer *self = mon->self; struct tipc_mon_domain *cache = &mon->cache; struct tipc_mon_domain *dom = self->domain; struct tipc_peer *peer = self; u64 prev_up_map = dom->up_map; u16 member_cnt, i; bool diff; /* Update local domain size based on current size of cluster */ member_cnt = dom_size(mon->peer_cnt) - 1; self->applied = member_cnt; /* Update native and cached outgoing local domain records */ dom->len = dom_rec_len(dom, member_cnt); diff = dom->member_cnt != member_cnt; dom->member_cnt = member_cnt; for (i = 0; i < member_cnt; i++) { peer = peer_nxt(peer); diff |= dom->members[i] != peer->addr; dom->members[i] = peer->addr; map_set(&dom->up_map, i, peer->is_up); cache->members[i] = mon_cpu_to_le32(peer->addr); } diff |= dom->up_map != prev_up_map; if (!diff) return; dom->gen = ++mon->dom_gen; cache->len = mon_cpu_to_le16(dom->len); cache->gen = mon_cpu_to_le16(dom->gen); cache->member_cnt = mon_cpu_to_le16(member_cnt); cache->up_map = mon_cpu_to_le64(dom->up_map); mon_apply_domain(mon, self); } /* mon_update_neighbors() : update preceding neighbors of added/removed peer */ static void mon_update_neighbors(struct tipc_monitor *mon, struct tipc_peer *peer) { int dz, i; dz = dom_size(mon->peer_cnt); for (i = 0; i < dz; i++) { mon_apply_domain(mon, peer); peer = peer_prev(peer); } } /* mon_assign_roles() : reassign peer roles after a network change * The monitor list is consistent at this stage; i.e., each peer is monitoring * a set of domain members as matched between domain record and the monitor list */ static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head) { struct tipc_peer *peer = peer_nxt(head); struct tipc_peer *self = mon->self; int i = 0; for (; peer != self; peer = peer_nxt(peer)) { peer->is_local = false; /* Update domain member */ if (i++ < head->applied) { peer->is_head = false; if (head == self) peer->is_local = true; continue; } /* Assign next domain head */ if (!peer->is_up) continue; if (peer->is_head) break; head = peer; head->is_head = true; i = 0; } mon->list_gen++; } void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); struct tipc_peer *self; struct tipc_peer *peer, *prev, *head; if (!mon) return; self = get_self(net, bearer_id); write_lock_bh(&mon->lock); peer = get_peer(mon, addr); if (!peer) goto exit; prev = peer_prev(peer); list_del(&peer->list); hlist_del(&peer->hash); kfree(peer->domain); kfree(peer); mon->peer_cnt--; head = peer_head(prev); if (head == self) mon_update_local_domain(mon); mon_update_neighbors(mon, prev); /* Revert to full-mesh monitoring if we reach threshold */ if (!tipc_mon_is_active(net, mon)) { list_for_each_entry(peer, &self->list, list) { kfree(peer->domain); peer->domain = NULL; peer->applied = 0; } } mon_assign_roles(mon, head); exit: write_unlock_bh(&mon->lock); } static bool tipc_mon_add_peer(struct tipc_monitor *mon, u32 addr, struct tipc_peer **peer) { struct tipc_peer *self = mon->self; struct tipc_peer *cur, *prev, *p; p = kzalloc(sizeof(*p), GFP_ATOMIC); *peer = p; if (!p) return false; p->addr = addr; /* Add new peer to lookup list */ INIT_LIST_HEAD(&p->list); hlist_add_head(&p->hash, &mon->peers[tipc_hashfn(addr)]); /* Sort new peer into iterator list, in ascending circular order */ prev = self; list_for_each_entry(cur, &self->list, list) { if ((addr > prev->addr) && (addr < cur->addr)) break; if (((addr < cur->addr) || (addr > prev->addr)) && (prev->addr > cur->addr)) break; prev = cur; } list_add_tail(&p->list, &cur->list); mon->peer_cnt++; mon_update_neighbors(mon, p); return true; } void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); struct tipc_peer *self = get_self(net, bearer_id); struct tipc_peer *peer, *head; write_lock_bh(&mon->lock); peer = get_peer(mon, addr); if (!peer && !tipc_mon_add_peer(mon, addr, &peer)) goto exit; peer->is_up = true; head = peer_head(peer); if (head == self) mon_update_local_domain(mon); mon_assign_roles(mon, head); exit: write_unlock_bh(&mon->lock); } void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); struct tipc_peer *self; struct tipc_peer *peer, *head; struct tipc_mon_domain *dom; int applied; if (!mon) return; self = get_self(net, bearer_id); write_lock_bh(&mon->lock); peer = get_peer(mon, addr); if (!peer) { pr_warn("Mon: unknown link %x/%u DOWN\n", addr, bearer_id); goto exit; } applied = peer->applied; peer->applied = 0; dom = peer->domain; peer->domain = NULL; if (peer->is_head) mon_identify_lost_members(peer, dom, applied); kfree(dom); peer->is_up = false; peer->is_head = false; peer->is_local = false; peer->down_cnt = 0; head = peer_head(peer); if (head == self) mon_update_local_domain(mon); mon_assign_roles(mon, head); exit: write_unlock_bh(&mon->lock); } /* tipc_mon_rcv - process monitor domain event message */ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, struct tipc_mon_state *state, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); struct tipc_mon_domain *arrv_dom = data; struct tipc_mon_domain dom_bef; struct tipc_mon_domain *dom; struct tipc_peer *peer; u16 new_member_cnt = mon_le16_to_cpu(arrv_dom->member_cnt); int new_dlen = dom_rec_len(arrv_dom, new_member_cnt); u16 new_gen = mon_le16_to_cpu(arrv_dom->gen); u16 acked_gen = mon_le16_to_cpu(arrv_dom->ack_gen); u16 arrv_dlen = mon_le16_to_cpu(arrv_dom->len); bool probing = state->probing; int i, applied_bef; state->probing = false; /* Sanity check received domain record */ if (new_member_cnt > MAX_MON_DOMAIN) return; if (dlen < dom_rec_len(arrv_dom, 0)) return; if (dlen != dom_rec_len(arrv_dom, new_member_cnt)) return; if (dlen < new_dlen || arrv_dlen != new_dlen) return; /* Synch generation numbers with peer if link just came up */ if (!state->synched) { state->peer_gen = new_gen - 1; state->acked_gen = acked_gen; state->synched = true; } if (more(acked_gen, state->acked_gen)) state->acked_gen = acked_gen; /* Drop duplicate unless we are waiting for a probe response */ if (!more(new_gen, state->peer_gen) && !probing) return; write_lock_bh(&mon->lock); peer = get_peer(mon, addr); if (!peer || !peer->is_up) goto exit; /* Peer is confirmed, stop any ongoing probing */ peer->down_cnt = 0; /* Task is done for duplicate record */ if (!more(new_gen, state->peer_gen)) goto exit; state->peer_gen = new_gen; /* Cache current domain record for later use */ dom_bef.member_cnt = 0; dom = peer->domain; if (dom) memcpy(&dom_bef, dom, dom->len); /* Transform and store received domain record */ if (!dom || (dom->len < new_dlen)) { kfree(dom); dom = kmalloc(new_dlen, GFP_ATOMIC); peer->domain = dom; if (!dom) goto exit; } dom->len = new_dlen; dom->gen = new_gen; dom->member_cnt = new_member_cnt; dom->up_map = mon_le64_to_cpu(arrv_dom->up_map); for (i = 0; i < new_member_cnt; i++) dom->members[i] = mon_le32_to_cpu(arrv_dom->members[i]); /* Update peers affected by this domain record */ applied_bef = peer->applied; mon_apply_domain(mon, peer); mon_identify_lost_members(peer, &dom_bef, applied_bef); mon_assign_roles(mon, peer_head(peer)); exit: write_unlock_bh(&mon->lock); } void tipc_mon_prep(struct net *net, void *data, int *dlen, struct tipc_mon_state *state, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); struct tipc_mon_domain *dom = data; u16 gen = mon->dom_gen; u16 len; /* Send invalid record if not active */ if (!tipc_mon_is_active(net, mon)) { dom->len = 0; return; } /* Send only a dummy record with ack if peer has acked our last sent */ if (likely(state->acked_gen == gen)) { len = dom_rec_len(dom, 0); *dlen = len; dom->len = mon_cpu_to_le16(len); dom->gen = mon_cpu_to_le16(gen); dom->ack_gen = mon_cpu_to_le16(state->peer_gen); dom->member_cnt = 0; return; } /* Send the full record */ read_lock_bh(&mon->lock); len = mon_le16_to_cpu(mon->cache.len); *dlen = len; memcpy(data, &mon->cache, len); read_unlock_bh(&mon->lock); dom->ack_gen = mon_cpu_to_le16(state->peer_gen); } void tipc_mon_get_state(struct net *net, u32 addr, struct tipc_mon_state *state, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); struct tipc_peer *peer; if (!tipc_mon_is_active(net, mon)) { state->probing = false; state->monitoring = true; return; } /* Used cached state if table has not changed */ if (!state->probing && (state->list_gen == mon->list_gen) && (state->acked_gen == mon->dom_gen)) return; read_lock_bh(&mon->lock); peer = get_peer(mon, addr); if (peer) { state->probing = state->acked_gen != mon->dom_gen; state->probing |= peer->down_cnt; state->reset |= peer->down_cnt >= MAX_PEER_DOWN_EVENTS; state->monitoring = peer->is_local; state->monitoring |= peer->is_head; state->list_gen = mon->list_gen; } read_unlock_bh(&mon->lock); } static void mon_timeout(struct timer_list *t) { struct tipc_monitor *mon = timer_container_of(mon, t, timer); struct tipc_peer *self; int best_member_cnt = dom_size(mon->peer_cnt) - 1; write_lock_bh(&mon->lock); self = mon->self; if (self && (best_member_cnt != self->applied)) { mon_update_local_domain(mon); mon_assign_roles(mon, self); } write_unlock_bh(&mon->lock); mod_timer(&mon->timer, jiffies + mon->timer_intv); } int tipc_mon_create(struct net *net, int bearer_id) { struct tipc_net *tn = tipc_net(net); struct tipc_monitor *mon; struct tipc_peer *self; struct tipc_mon_domain *dom; if (tn->monitors[bearer_id]) return 0; mon = kzalloc(sizeof(*mon), GFP_ATOMIC); self = kzalloc(sizeof(*self), GFP_ATOMIC); dom = kzalloc(sizeof(*dom), GFP_ATOMIC); if (!mon || !self || !dom) { kfree(mon); kfree(self); kfree(dom); return -ENOMEM; } tn->monitors[bearer_id] = mon; rwlock_init(&mon->lock); mon->net = net; mon->peer_cnt = 1; mon->self = self; self->domain = dom; self->addr = tipc_own_addr(net); self->is_up = true; self->is_head = true; INIT_LIST_HEAD(&self->list); timer_setup(&mon->timer, mon_timeout, 0); mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff)); mod_timer(&mon->timer, jiffies + mon->timer_intv); return 0; } void tipc_mon_delete(struct net *net, int bearer_id) { struct tipc_net *tn = tipc_net(net); struct tipc_monitor *mon = tipc_monitor(net, bearer_id); struct tipc_peer *self; struct tipc_peer *peer, *tmp; if (!mon) return; self = get_self(net, bearer_id); write_lock_bh(&mon->lock); tn->monitors[bearer_id] = NULL; list_for_each_entry_safe(peer, tmp, &self->list, list) { list_del(&peer->list); hlist_del(&peer->hash); kfree(peer->domain); kfree(peer); } mon->self = NULL; write_unlock_bh(&mon->lock); timer_shutdown_sync(&mon->timer); kfree(self->domain); kfree(self); kfree(mon); } void tipc_mon_reinit_self(struct net *net) { struct tipc_monitor *mon; int bearer_id; for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { mon = tipc_monitor(net, bearer_id); if (!mon) continue; write_lock_bh(&mon->lock); if (mon->self) mon->self->addr = tipc_own_addr(net); write_unlock_bh(&mon->lock); } } int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size) { struct tipc_net *tn = tipc_net(net); if (cluster_size > TIPC_CLUSTER_SIZE) return -EINVAL; tn->mon_threshold = cluster_size; return 0; } int tipc_nl_monitor_get_threshold(struct net *net) { struct tipc_net *tn = tipc_net(net); return tn->mon_threshold; } static int __tipc_nl_add_monitor_peer(struct tipc_peer *peer, struct tipc_nl_msg *msg) { struct tipc_mon_domain *dom = peer->domain; struct nlattr *attrs; void *hdr; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_MON_PEER_GET); if (!hdr) return -EMSGSIZE; attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON_PEER); if (!attrs) goto msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_ADDR, peer->addr)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_APPLIED, peer->applied)) goto attr_msg_full; if (peer->is_up) if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_UP)) goto attr_msg_full; if (peer->is_local) if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_LOCAL)) goto attr_msg_full; if (peer->is_head) if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_HEAD)) goto attr_msg_full; if (dom) { if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_DOMGEN, dom->gen)) goto attr_msg_full; if (nla_put_u64_64bit(msg->skb, TIPC_NLA_MON_PEER_UPMAP, dom->up_map, TIPC_NLA_MON_PEER_PAD)) goto attr_msg_full; if (nla_put(msg->skb, TIPC_NLA_MON_PEER_MEMBERS, dom->member_cnt * sizeof(u32), &dom->members)) goto attr_msg_full; } nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); return 0; attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; } int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, u32 bearer_id, u32 *prev_node) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); struct tipc_peer *peer; if (!mon) return -EINVAL; read_lock_bh(&mon->lock); peer = mon->self; do { if (*prev_node) { if (peer->addr == *prev_node) *prev_node = 0; else continue; } if (__tipc_nl_add_monitor_peer(peer, msg)) { *prev_node = peer->addr; read_unlock_bh(&mon->lock); return -EMSGSIZE; } } while ((peer = peer_nxt(peer)) != mon->self); read_unlock_bh(&mon->lock); return 0; } int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, u32 bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); char bearer_name[TIPC_MAX_BEARER_NAME]; struct nlattr *attrs; void *hdr; int ret; ret = tipc_bearer_get_name(net, bearer_name, bearer_id); if (ret || !mon) return 0; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_MON_GET); if (!hdr) return -EMSGSIZE; attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON); if (!attrs) goto msg_full; read_lock_bh(&mon->lock); if (nla_put_u32(msg->skb, TIPC_NLA_MON_REF, bearer_id)) goto attr_msg_full; if (tipc_mon_is_active(net, mon)) if (nla_put_flag(msg->skb, TIPC_NLA_MON_ACTIVE)) goto attr_msg_full; if (nla_put_string(msg->skb, TIPC_NLA_MON_BEARER_NAME, bearer_name)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEERCNT, mon->peer_cnt)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_MON_LISTGEN, mon->list_gen)) goto attr_msg_full; read_unlock_bh(&mon->lock); nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); return 0; attr_msg_full: read_unlock_bh(&mon->lock); nla_nest_cancel(msg->skb, attrs); msg_full: genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; }
602 333 7539 1996 412 5574 297 2 1 1 1 1 1 5 416 201 97 920 37 945 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_DCACHE_H #define __LINUX_DCACHE_H #include <linux/atomic.h> #include <linux/list.h> #include <linux/math.h> #include <linux/rculist.h> #include <linux/rculist_bl.h> #include <linux/spinlock.h> #include <linux/seqlock.h> #include <linux/cache.h> #include <linux/rcupdate.h> #include <linux/lockref.h> #include <linux/stringhash.h> #include <linux/wait.h> struct path; struct file; struct vfsmount; /* * linux/include/linux/dcache.h * * Dirent cache data structures * * (C) Copyright 1997 Thomas Schoebel-Theuer, * with heavy changes by Linus Torvalds */ #define IS_ROOT(x) ((x) == (x)->d_parent) /* The hash is always the low bits of hash_len */ #ifdef __LITTLE_ENDIAN #define HASH_LEN_DECLARE u32 hash; u32 len #define bytemask_from_count(cnt) (~(~0ul << (cnt)*8)) #else #define HASH_LEN_DECLARE u32 len; u32 hash #define bytemask_from_count(cnt) (~(~0ul >> (cnt)*8)) #endif /* * "quick string" -- eases parameter passing, but more importantly * saves "metadata" about the string (ie length and the hash). * * hash comes first so it snuggles against d_parent in the * dentry. */ struct qstr { union { struct { HASH_LEN_DECLARE; }; u64 hash_len; }; const unsigned char *name; }; #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } #define QSTR_LEN(n,l) (struct qstr)QSTR_INIT(n,l) #define QSTR(n) QSTR_LEN(n, strlen(n)) extern const struct qstr empty_name; extern const struct qstr slash_name; extern const struct qstr dotdot_name; /* * Try to keep struct dentry aligned on 64 byte cachelines (this will * give reasonable cacheline footprint with larger lines without the * large memory footprint increase). */ #ifdef CONFIG_64BIT # define DNAME_INLINE_WORDS 5 /* 192 bytes */ #else # ifdef CONFIG_SMP # define DNAME_INLINE_WORDS 9 /* 128 bytes */ # else # define DNAME_INLINE_WORDS 11 /* 128 bytes */ # endif #endif #define DNAME_INLINE_LEN (DNAME_INLINE_WORDS*sizeof(unsigned long)) union shortname_store { unsigned char string[DNAME_INLINE_LEN]; unsigned long words[DNAME_INLINE_WORDS]; }; #define d_lock d_lockref.lock #define d_iname d_shortname.string struct dentry { /* RCU lookup touched fields */ unsigned int d_flags; /* protected by d_lock */ seqcount_spinlock_t d_seq; /* per dentry seqlock */ struct hlist_bl_node d_hash; /* lookup hash list */ struct dentry *d_parent; /* parent directory */ union { struct qstr __d_name; /* for use ONLY in fs/dcache.c */ const struct qstr d_name; }; struct inode *d_inode; /* Where the name belongs to - NULL is * negative */ union shortname_store d_shortname; /* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */ /* Ref lookup also touches following */ const struct dentry_operations *d_op; struct super_block *d_sb; /* The root of the dentry tree */ unsigned long d_time; /* used by d_revalidate */ void *d_fsdata; /* fs-specific data */ /* --- cacheline 2 boundary (128 bytes) --- */ struct lockref d_lockref; /* per-dentry lock and refcount * keep separate from RCU lookup area if * possible! */ union { struct list_head d_lru; /* LRU list */ wait_queue_head_t *d_wait; /* in-lookup ones only */ }; struct hlist_node d_sib; /* child of parent list */ struct hlist_head d_children; /* our children */ /* * d_alias and d_rcu can share memory */ union { struct hlist_node d_alias; /* inode alias list */ struct hlist_bl_node d_in_lookup_hash; /* only for in-lookup ones */ struct rcu_head d_rcu; } d_u; }; /* * dentry->d_lock spinlock nesting subclasses: * * 0: normal * 1: nested */ enum dentry_d_lock_class { DENTRY_D_LOCK_NORMAL, /* implicitly used by plain spin_lock() APIs. */ DENTRY_D_LOCK_NESTED }; enum d_real_type { D_REAL_DATA, D_REAL_METADATA, }; struct dentry_operations { int (*d_revalidate)(struct inode *, const struct qstr *, struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, struct qstr *); int (*d_compare)(const struct dentry *, unsigned int, const char *, const struct qstr *); int (*d_delete)(const struct dentry *); int (*d_init)(struct dentry *); void (*d_release)(struct dentry *); void (*d_prune)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, enum d_real_type type); bool (*d_unalias_trylock)(const struct dentry *); void (*d_unalias_unlock)(const struct dentry *); } ____cacheline_aligned; /* * Locking rules for dentry_operations callbacks are to be found in * Documentation/filesystems/locking.rst. Keep it updated! * * FUrther descriptions are found in Documentation/filesystems/vfs.rst. * Keep it updated too! */ /* d_flags entries */ enum dentry_flags { DCACHE_OP_HASH = BIT(0), DCACHE_OP_COMPARE = BIT(1), DCACHE_OP_REVALIDATE = BIT(2), DCACHE_OP_DELETE = BIT(3), DCACHE_OP_PRUNE = BIT(4), /* * This dentry is possibly not currently connected to the dcache tree, * in which case its parent will either be itself, or will have this * flag as well. nfsd will not use a dentry with this bit set, but will * first endeavour to clear the bit either by discovering that it is * connected, or by performing lookup operations. Any filesystem which * supports nfsd_operations MUST have a lookup function which, if it * finds a directory inode with a DCACHE_DISCONNECTED dentry, will * d_move that dentry into place and return that dentry rather than the * passed one, typically using d_splice_alias. */ DCACHE_DISCONNECTED = BIT(5), DCACHE_REFERENCED = BIT(6), /* Recently used, don't discard. */ DCACHE_DONTCACHE = BIT(7), /* Purge from memory on final dput() */ DCACHE_CANT_MOUNT = BIT(8), DCACHE_GENOCIDE = BIT(9), DCACHE_SHRINK_LIST = BIT(10), DCACHE_OP_WEAK_REVALIDATE = BIT(11), /* * this dentry has been "silly renamed" and has to be deleted on the * last dput() */ DCACHE_NFSFS_RENAMED = BIT(12), DCACHE_FSNOTIFY_PARENT_WATCHED = BIT(13), /* Parent inode is watched by some fsnotify listener */ DCACHE_DENTRY_KILLED = BIT(14), DCACHE_MOUNTED = BIT(15), /* is a mountpoint */ DCACHE_NEED_AUTOMOUNT = BIT(16), /* handle automount on this dir */ DCACHE_MANAGE_TRANSIT = BIT(17), /* manage transit from this dirent */ DCACHE_LRU_LIST = BIT(18), DCACHE_ENTRY_TYPE = (7 << 19), /* bits 19..21 are for storing type: */ DCACHE_MISS_TYPE = (0 << 19), /* Negative dentry */ DCACHE_WHITEOUT_TYPE = (1 << 19), /* Whiteout dentry (stop pathwalk) */ DCACHE_DIRECTORY_TYPE = (2 << 19), /* Normal directory */ DCACHE_AUTODIR_TYPE = (3 << 19), /* Lookupless directory (presumed automount) */ DCACHE_REGULAR_TYPE = (4 << 19), /* Regular file type */ DCACHE_SPECIAL_TYPE = (5 << 19), /* Other file type */ DCACHE_SYMLINK_TYPE = (6 << 19), /* Symlink */ DCACHE_NOKEY_NAME = BIT(22), /* Encrypted name encoded without key */ DCACHE_OP_REAL = BIT(23), DCACHE_PAR_LOOKUP = BIT(24), /* being looked up (with parent locked shared) */ DCACHE_DENTRY_CURSOR = BIT(25), DCACHE_NORCU = BIT(26), /* No RCU delay for freeing */ }; #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) extern seqlock_t rename_lock; /* * These are the low-level FS interfaces to the dcache.. */ extern void d_instantiate(struct dentry *, struct inode *); extern void d_instantiate_new(struct dentry *, struct inode *); extern void __d_drop(struct dentry *dentry); extern void d_drop(struct dentry *dentry); extern void d_delete(struct dentry *); /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_anon(struct super_block *); extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, wait_queue_head_t *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); /* weird procfs mess; *NOT* exported */ extern struct dentry * d_splice_alias_ops(struct inode *, struct dentry *, const struct dentry_operations *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern bool d_same_name(const struct dentry *dentry, const struct dentry *parent, const struct qstr *name); extern struct dentry *d_find_any_alias(struct inode *inode); extern struct dentry * d_obtain_alias(struct inode *); extern struct dentry * d_obtain_root(struct inode *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); extern void d_invalidate(struct dentry *); /* only used at mount-time */ extern struct dentry * d_make_root(struct inode *); extern void d_mark_tmpfile(struct file *, struct inode *); extern void d_tmpfile(struct file *, struct inode *); extern struct dentry *d_find_alias(struct inode *); extern void d_prune_aliases(struct inode *); extern struct dentry *d_find_alias_rcu(struct inode *); /* test whether we have any submounts in a subdir tree */ extern int path_has_submounts(const struct path *); /* * This adds the entry to the hash queues. */ extern void d_rehash(struct dentry *); extern void d_add(struct dentry *, struct inode *); /* used for rename() and baskets */ extern void d_move(struct dentry *, struct dentry *); extern void d_exchange(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); extern struct dentry *d_lookup(const struct dentry *, const struct qstr *); static inline unsigned d_count(const struct dentry *dentry) { return dentry->d_lockref.count; } ino_t d_parent_ino(struct dentry *dentry); /* * helper function for dentry_operations.d_dname() members */ extern __printf(3, 4) char *dynamic_dname(char *, int, const char *, ...); extern char *__d_path(const struct path *, const struct path *, char *, int); extern char *d_absolute_path(const struct path *, char *, int); extern char *d_path(const struct path *, char *, int); extern char *dentry_path_raw(const struct dentry *, char *, int); extern char *dentry_path(const struct dentry *, char *, int); /* Allocation counts.. */ /** * dget_dlock - get a reference to a dentry * @dentry: dentry to get a reference to * * Given a live dentry, increment the reference count and return the dentry. * Caller must hold @dentry->d_lock. Making sure that dentry is alive is * caller's resonsibility. There are many conditions sufficient to guarantee * that; e.g. anything with non-negative refcount is alive, so's anything * hashed, anything positive, anyone's parent, etc. */ static inline struct dentry *dget_dlock(struct dentry *dentry) { dentry->d_lockref.count++; return dentry; } /** * dget - get a reference to a dentry * @dentry: dentry to get a reference to * * Given a dentry or %NULL pointer increment the reference count * if appropriate and return the dentry. A dentry will not be * destroyed when it has references. Conversely, a dentry with * no references can disappear for any number of reasons, starting * with memory pressure. In other words, that primitive is * used to clone an existing reference; using it on something with * zero refcount is a bug. * * NOTE: it will spin if @dentry->d_lock is held. From the deadlock * avoidance point of view it is equivalent to spin_lock()/increment * refcount/spin_unlock(), so calling it under @dentry->d_lock is * always a bug; so's calling it under ->d_lock on any of its descendents. * */ static inline struct dentry *dget(struct dentry *dentry) { if (dentry) lockref_get(&dentry->d_lockref); return dentry; } extern struct dentry *dget_parent(struct dentry *dentry); /** * d_unhashed - is dentry hashed * @dentry: entry to check * * Returns true if the dentry passed is not currently hashed. */ static inline int d_unhashed(const struct dentry *dentry) { return hlist_bl_unhashed(&dentry->d_hash); } static inline int d_unlinked(const struct dentry *dentry) { return d_unhashed(dentry) && !IS_ROOT(dentry); } static inline int cant_mount(const struct dentry *dentry) { return (dentry->d_flags & DCACHE_CANT_MOUNT); } static inline void dont_mount(struct dentry *dentry) { spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_CANT_MOUNT; spin_unlock(&dentry->d_lock); } extern void __d_lookup_unhash_wake(struct dentry *dentry); static inline int d_in_lookup(const struct dentry *dentry) { return dentry->d_flags & DCACHE_PAR_LOOKUP; } static inline void d_lookup_done(struct dentry *dentry) { if (unlikely(d_in_lookup(dentry))) __d_lookup_unhash_wake(dentry); } extern void dput(struct dentry *); static inline bool d_managed(const struct dentry *dentry) { return dentry->d_flags & DCACHE_MANAGED_DENTRY; } static inline bool d_mountpoint(const struct dentry *dentry) { return dentry->d_flags & DCACHE_MOUNTED; } /* * Directory cache entry type accessor functions. */ static inline unsigned __d_entry_type(const struct dentry *dentry) { return dentry->d_flags & DCACHE_ENTRY_TYPE; } static inline bool d_is_miss(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_MISS_TYPE; } static inline bool d_is_whiteout(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_WHITEOUT_TYPE; } static inline bool d_can_lookup(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_DIRECTORY_TYPE; } static inline bool d_is_autodir(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_AUTODIR_TYPE; } static inline bool d_is_dir(const struct dentry *dentry) { return d_can_lookup(dentry) || d_is_autodir(dentry); } static inline bool d_is_symlink(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_SYMLINK_TYPE; } static inline bool d_is_reg(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_REGULAR_TYPE; } static inline bool d_is_special(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_SPECIAL_TYPE; } static inline bool d_is_file(const struct dentry *dentry) { return d_is_reg(dentry) || d_is_special(dentry); } static inline bool d_is_negative(const struct dentry *dentry) { // TODO: check d_is_whiteout(dentry) also. return d_is_miss(dentry); } static inline bool d_flags_negative(unsigned flags) { return (flags & DCACHE_ENTRY_TYPE) == DCACHE_MISS_TYPE; } static inline bool d_is_positive(const struct dentry *dentry) { return !d_is_negative(dentry); } /** * d_really_is_negative - Determine if a dentry is really negative (ignoring fallthroughs) * @dentry: The dentry in question * * Returns true if the dentry represents either an absent name or a name that * doesn't map to an inode (ie. ->d_inode is NULL). The dentry could represent * a true miss, a whiteout that isn't represented by a 0,0 chardev or a * fallthrough marker in an opaque directory. * * Note! (1) This should be used *only* by a filesystem to examine its own * dentries. It should not be used to look at some other filesystem's * dentries. (2) It should also be used in combination with d_inode() to get * the inode. (3) The dentry may have something attached to ->d_lower and the * type field of the flags may be set to something other than miss or whiteout. */ static inline bool d_really_is_negative(const struct dentry *dentry) { return dentry->d_inode == NULL; } /** * d_really_is_positive - Determine if a dentry is really positive (ignoring fallthroughs) * @dentry: The dentry in question * * Returns true if the dentry represents a name that maps to an inode * (ie. ->d_inode is not NULL). The dentry might still represent a whiteout if * that is represented on medium as a 0,0 chardev. * * Note! (1) This should be used *only* by a filesystem to examine its own * dentries. It should not be used to look at some other filesystem's * dentries. (2) It should also be used in combination with d_inode() to get * the inode. */ static inline bool d_really_is_positive(const struct dentry *dentry) { return dentry->d_inode != NULL; } static inline int simple_positive(const struct dentry *dentry) { return d_really_is_positive(dentry) && !d_unhashed(dentry); } unsigned long vfs_pressure_ratio(unsigned long val); /** * d_inode - Get the actual inode of this dentry * @dentry: The dentry to query * * This is the helper normal filesystems should use to get at their own inodes * in their own dentries and ignore the layering superimposed upon them. */ static inline struct inode *d_inode(const struct dentry *dentry) { return dentry->d_inode; } /** * d_inode_rcu - Get the actual inode of this dentry with READ_ONCE() * @dentry: The dentry to query * * This is the helper normal filesystems should use to get at their own inodes * in their own dentries and ignore the layering superimposed upon them. */ static inline struct inode *d_inode_rcu(const struct dentry *dentry) { return READ_ONCE(dentry->d_inode); } /** * d_backing_inode - Get upper or lower inode we should be using * @upper: The upper layer * * This is the helper that should be used to get at the inode that will be used * if this dentry were to be opened as a file. The inode may be on the upper * dentry or it may be on a lower dentry pinned by the upper. * * Normal filesystems should not use this to access their own inodes. */ static inline struct inode *d_backing_inode(const struct dentry *upper) { struct inode *inode = upper->d_inode; return inode; } /** * d_real - Return the real dentry * @dentry: the dentry to query * @type: the type of real dentry (data or metadata) * * If dentry is on a union/overlay, then return the underlying, real dentry. * Otherwise return the dentry itself. * * See also: Documentation/filesystems/vfs.rst */ static inline struct dentry *d_real(struct dentry *dentry, enum d_real_type type) { if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) return dentry->d_op->d_real(dentry, type); else return dentry; } /** * d_real_inode - Return the real inode hosting the data * @dentry: The dentry to query * * If dentry is on a union/overlay, then return the underlying, real inode. * Otherwise return d_inode(). */ static inline struct inode *d_real_inode(const struct dentry *dentry) { /* This usage of d_real() results in const dentry */ return d_inode(d_real((struct dentry *) dentry, D_REAL_DATA)); } struct name_snapshot { struct qstr name; union shortname_store inline_name; }; void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *); void release_dentry_name_snapshot(struct name_snapshot *); static inline struct dentry *d_first_child(const struct dentry *dentry) { return hlist_entry_safe(dentry->d_children.first, struct dentry, d_sib); } static inline struct dentry *d_next_sibling(const struct dentry *dentry) { return hlist_entry_safe(dentry->d_sib.next, struct dentry, d_sib); } void set_default_d_op(struct super_block *, const struct dentry_operations *); #endif /* __LINUX_DCACHE_H */
88 27 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_RTNH_H #define __NET_RTNH_H #include <linux/rtnetlink.h> #include <net/netlink.h> static inline int rtnh_ok(const struct rtnexthop *rtnh, int remaining) { return remaining >= (int)sizeof(*rtnh) && rtnh->rtnh_len >= sizeof(*rtnh) && rtnh->rtnh_len <= remaining; } static inline struct rtnexthop *rtnh_next(const struct rtnexthop *rtnh, int *remaining) { int totlen = NLA_ALIGN(rtnh->rtnh_len); *remaining -= totlen; return (struct rtnexthop *) ((char *) rtnh + totlen); } static inline struct nlattr *rtnh_attrs(const struct rtnexthop *rtnh) { return (struct nlattr *) ((char *) rtnh + NLA_ALIGN(sizeof(*rtnh))); } static inline int rtnh_attrlen(const struct rtnexthop *rtnh) { return rtnh->rtnh_len - NLA_ALIGN(sizeof(*rtnh)); } #endif
2283 509 4744 4016 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_USER_NAMESPACE_H #define _LINUX_USER_NAMESPACE_H #include <linux/kref.h> #include <linux/nsproxy.h> #include <linux/ns_common.h> #include <linux/rculist_nulls.h> #include <linux/sched.h> #include <linux/workqueue.h> #include <linux/rcuref.h> #include <linux/rwsem.h> #include <linux/sysctl.h> #include <linux/err.h> #define UID_GID_MAP_MAX_BASE_EXTENTS 5 #define UID_GID_MAP_MAX_EXTENTS 340 struct uid_gid_extent { u32 first; u32 lower_first; u32 count; }; struct uid_gid_map { /* 64 bytes -- 1 cache line */ union { struct { struct uid_gid_extent extent[UID_GID_MAP_MAX_BASE_EXTENTS]; u32 nr_extents; }; struct { struct uid_gid_extent *forward; struct uid_gid_extent *reverse; }; }; }; #define USERNS_SETGROUPS_ALLOWED 1UL #define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED struct ucounts; enum ucount_type { UCOUNT_USER_NAMESPACES, UCOUNT_PID_NAMESPACES, UCOUNT_UTS_NAMESPACES, UCOUNT_IPC_NAMESPACES, UCOUNT_NET_NAMESPACES, UCOUNT_MNT_NAMESPACES, UCOUNT_CGROUP_NAMESPACES, UCOUNT_TIME_NAMESPACES, #ifdef CONFIG_INOTIFY_USER UCOUNT_INOTIFY_INSTANCES, UCOUNT_INOTIFY_WATCHES, #endif #ifdef CONFIG_FANOTIFY UCOUNT_FANOTIFY_GROUPS, UCOUNT_FANOTIFY_MARKS, #endif UCOUNT_COUNTS, }; enum rlimit_type { UCOUNT_RLIMIT_NPROC, UCOUNT_RLIMIT_MSGQUEUE, UCOUNT_RLIMIT_SIGPENDING, UCOUNT_RLIMIT_MEMLOCK, UCOUNT_RLIMIT_COUNTS, }; #if IS_ENABLED(CONFIG_BINFMT_MISC) struct binfmt_misc; #endif struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; struct uid_gid_map projid_map; struct user_namespace *parent; int level; kuid_t owner; kgid_t group; struct ns_common ns; unsigned long flags; /* parent_could_setfcap: true if the creator if this ns had CAP_SETFCAP * in its effective capability set at the child ns creation time. */ bool parent_could_setfcap; #ifdef CONFIG_KEYS /* List of joinable keyrings in this namespace. Modification access of * these pointers is controlled by keyring_sem. Once * user_keyring_register is set, it won't be changed, so it can be * accessed directly with READ_ONCE(). */ struct list_head keyring_name_list; struct key *user_keyring_register; struct rw_semaphore keyring_sem; #endif /* Register of per-UID persistent keyrings for this namespace */ #ifdef CONFIG_PERSISTENT_KEYRINGS struct key *persistent_keyring_register; #endif struct work_struct work; #ifdef CONFIG_SYSCTL struct ctl_table_set set; struct ctl_table_header *sysctls; #endif struct ucounts *ucounts; long ucount_max[UCOUNT_COUNTS]; long rlimit_max[UCOUNT_RLIMIT_COUNTS]; #if IS_ENABLED(CONFIG_BINFMT_MISC) struct binfmt_misc *binfmt_misc; #endif } __randomize_layout; struct ucounts { struct hlist_nulls_node node; struct user_namespace *ns; kuid_t uid; struct rcu_head rcu; rcuref_t count; atomic_long_t ucount[UCOUNT_COUNTS]; atomic_long_t rlimit[UCOUNT_RLIMIT_COUNTS]; }; extern struct user_namespace init_user_ns; extern struct ucounts init_ucounts; bool setup_userns_sysctls(struct user_namespace *ns); void retire_userns_sysctls(struct user_namespace *ns); struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type); void dec_ucount(struct ucounts *ucounts, enum ucount_type type); struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid); void put_ucounts(struct ucounts *ucounts); static inline struct ucounts * __must_check get_ucounts(struct ucounts *ucounts) { if (rcuref_get(&ucounts->count)) return ucounts; return NULL; } static inline long get_rlimit_value(struct ucounts *ucounts, enum rlimit_type type) { return atomic_long_read(&ucounts->rlimit[type]); } long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v); bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v); long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type, bool override_rlimit); void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type); bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long max); static inline long get_userns_rlimit_max(struct user_namespace *ns, enum rlimit_type type) { return READ_ONCE(ns->rlimit_max[type]); } static inline void set_userns_rlimit_max(struct user_namespace *ns, enum rlimit_type type, unsigned long max) { ns->rlimit_max[type] = max <= LONG_MAX ? max : LONG_MAX; } #ifdef CONFIG_USER_NS static inline struct user_namespace *to_user_ns(struct ns_common *ns) { return container_of(ns, struct user_namespace, ns); } static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { if (ns) ns_ref_inc(ns); return ns; } extern int create_user_ns(struct cred *new); extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred); extern void __put_user_ns(struct user_namespace *ns); static inline void put_user_ns(struct user_namespace *ns) { if (ns && ns_ref_put(ns)) __put_user_ns(ns); } struct seq_operations; extern const struct seq_operations proc_uid_seq_operations; extern const struct seq_operations proc_gid_seq_operations; extern const struct seq_operations proc_projid_seq_operations; extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *); extern int proc_setgroups_show(struct seq_file *m, void *v); extern bool userns_may_setgroups(const struct user_namespace *ns); extern bool in_userns(const struct user_namespace *ancestor, const struct user_namespace *child); extern bool current_in_userns(const struct user_namespace *target_ns); struct ns_common *ns_get_owner(struct ns_common *ns); #else static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { return &init_user_ns; } static inline int create_user_ns(struct cred *new) { return -EINVAL; } static inline int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) { if (unshare_flags & CLONE_NEWUSER) return -EINVAL; return 0; } static inline void put_user_ns(struct user_namespace *ns) { } static inline bool userns_may_setgroups(const struct user_namespace *ns) { return true; } static inline bool in_userns(const struct user_namespace *ancestor, const struct user_namespace *child) { return true; } static inline bool current_in_userns(const struct user_namespace *target_ns) { return true; } static inline struct ns_common *ns_get_owner(struct ns_common *ns) { return ERR_PTR(-EPERM); } #endif #endif /* _LINUX_USER_H */
4 4 6 23 7 7 7 3 4 4 4 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 // SPDX-License-Identifier: GPL-2.0-only /* * LED support for the input layer * * Copyright 2010-2015 Samuel Thibault <samuel.thibault@ens-lyon.org> */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/init.h> #include <linux/leds.h> #include <linux/input.h> #if IS_ENABLED(CONFIG_VT) #define VT_TRIGGER(_name) .trigger = _name #else #define VT_TRIGGER(_name) .trigger = NULL #endif #if IS_ENABLED(CONFIG_SND_CTL_LED) #define AUDIO_TRIGGER(_name) .trigger = _name #else #define AUDIO_TRIGGER(_name) .trigger = NULL #endif static const struct { const char *name; const char *trigger; } input_led_info[LED_CNT] = { [LED_NUML] = { "numlock", VT_TRIGGER("kbd-numlock") }, [LED_CAPSL] = { "capslock", VT_TRIGGER("kbd-capslock") }, [LED_SCROLLL] = { "scrolllock", VT_TRIGGER("kbd-scrolllock") }, [LED_COMPOSE] = { "compose" }, [LED_KANA] = { "kana", VT_TRIGGER("kbd-kanalock") }, [LED_SLEEP] = { "sleep" } , [LED_SUSPEND] = { "suspend" }, [LED_MUTE] = { "mute", AUDIO_TRIGGER("audio-mute") }, [LED_MISC] = { "misc" }, [LED_MAIL] = { "mail" }, [LED_CHARGING] = { "charging" }, }; struct input_led { struct led_classdev cdev; struct input_handle *handle; unsigned int code; /* One of LED_* constants */ }; struct input_leds { struct input_handle handle; unsigned int num_leds; struct input_led leds[] __counted_by(num_leds); }; static enum led_brightness input_leds_brightness_get(struct led_classdev *cdev) { struct input_led *led = container_of(cdev, struct input_led, cdev); struct input_dev *input = led->handle->dev; return test_bit(led->code, input->led) ? cdev->max_brightness : 0; } static void input_leds_brightness_set(struct led_classdev *cdev, enum led_brightness brightness) { struct input_led *led = container_of(cdev, struct input_led, cdev); input_inject_event(led->handle, EV_LED, led->code, !!brightness); } static void input_leds_event(struct input_handle *handle, unsigned int type, unsigned int code, int value) { } static int input_leds_get_count(struct input_dev *dev) { unsigned int led_code; int count = 0; for_each_set_bit(led_code, dev->ledbit, LED_CNT) if (input_led_info[led_code].name) count++; return count; } static int input_leds_connect(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id) { struct input_leds *leds; struct input_led *led; unsigned int num_leds; unsigned int led_code; int led_no; int error; num_leds = input_leds_get_count(dev); if (!num_leds) return -ENXIO; leds = kzalloc(struct_size(leds, leds, num_leds), GFP_KERNEL); if (!leds) return -ENOMEM; leds->num_leds = num_leds; leds->handle.dev = dev; leds->handle.handler = handler; leds->handle.name = "leds"; leds->handle.private = leds; error = input_register_handle(&leds->handle); if (error) goto err_free_mem; error = input_open_device(&leds->handle); if (error) goto err_unregister_handle; led_no = 0; for_each_set_bit(led_code, dev->ledbit, LED_CNT) { if (!input_led_info[led_code].name) continue; led = &leds->leds[led_no]; led->handle = &leds->handle; led->code = led_code; led->cdev.name = kasprintf(GFP_KERNEL, "%s::%s", dev_name(&dev->dev), input_led_info[led_code].name); if (!led->cdev.name) { error = -ENOMEM; goto err_unregister_leds; } led->cdev.max_brightness = 1; led->cdev.brightness_get = input_leds_brightness_get; led->cdev.brightness_set = input_leds_brightness_set; led->cdev.default_trigger = input_led_info[led_code].trigger; error = led_classdev_register(&dev->dev, &led->cdev); if (error) { dev_err(&dev->dev, "failed to register LED %s: %d\n", led->cdev.name, error); kfree(led->cdev.name); goto err_unregister_leds; } led_no++; } return 0; err_unregister_leds: while (--led_no >= 0) { struct input_led *led = &leds->leds[led_no]; led_classdev_unregister(&led->cdev); kfree(led->cdev.name); } input_close_device(&leds->handle); err_unregister_handle: input_unregister_handle(&leds->handle); err_free_mem: kfree(leds); return error; } static void input_leds_disconnect(struct input_handle *handle) { struct input_leds *leds = handle->private; int i; for (i = 0; i < leds->num_leds; i++) { struct input_led *led = &leds->leds[i]; led_classdev_unregister(&led->cdev); kfree(led->cdev.name); } input_close_device(handle); input_unregister_handle(handle); kfree(leds); } static const struct input_device_id input_leds_ids[] = { { .flags = INPUT_DEVICE_ID_MATCH_EVBIT, .evbit = { BIT_MASK(EV_LED) }, }, { }, }; MODULE_DEVICE_TABLE(input, input_leds_ids); static struct input_handler input_leds_handler = { .event = input_leds_event, .connect = input_leds_connect, .disconnect = input_leds_disconnect, .name = "leds", .id_table = input_leds_ids, }; static int __init input_leds_init(void) { return input_register_handler(&input_leds_handler); } module_init(input_leds_init); static void __exit input_leds_exit(void) { input_unregister_handler(&input_leds_handler); } module_exit(input_leds_exit); MODULE_AUTHOR("Samuel Thibault <samuel.thibault@ens-lyon.org>"); MODULE_AUTHOR("Dmitry Torokhov <dmitry.torokhov@gmail.com>"); MODULE_DESCRIPTION("Input -> LEDs Bridge"); MODULE_LICENSE("GPL v2");
8 1 7 1 599 596 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 /* * Copyright (C) 2016 Red Hat * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Rob Clark <robdclark@gmail.com> */ #include <linux/debugfs.h> #include <linux/dynamic_debug.h> #include <linux/export.h> #include <linux/io.h> #include <linux/moduleparam.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/stdarg.h> #include <drm/drm.h> #include <drm/drm_drv.h> #include <drm/drm_print.h> /* * __drm_debug: Enable debug output. * Bitmask of DRM_UT_x. See include/drm/drm_print.h for details. */ unsigned long __drm_debug; EXPORT_SYMBOL(__drm_debug); MODULE_PARM_DESC(debug, "Enable debug output, where each bit enables a debug category.\n" "\t\tBit 0 (0x01) will enable CORE messages (drm core code)\n" "\t\tBit 1 (0x02) will enable DRIVER messages (drm controller code)\n" "\t\tBit 2 (0x04) will enable KMS messages (modesetting code)\n" "\t\tBit 3 (0x08) will enable PRIME messages (prime code)\n" "\t\tBit 4 (0x10) will enable ATOMIC messages (atomic code)\n" "\t\tBit 5 (0x20) will enable VBL messages (vblank code)\n" "\t\tBit 7 (0x80) will enable LEASE messages (leasing code)\n" "\t\tBit 8 (0x100) will enable DP messages (displayport code)"); #if !defined(CONFIG_DRM_USE_DYNAMIC_DEBUG) module_param_named(debug, __drm_debug, ulong, 0600); #else /* classnames must match vals of enum drm_debug_category */ DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0, "DRM_UT_CORE", "DRM_UT_DRIVER", "DRM_UT_KMS", "DRM_UT_PRIME", "DRM_UT_ATOMIC", "DRM_UT_VBL", "DRM_UT_STATE", "DRM_UT_LEASE", "DRM_UT_DP", "DRM_UT_DRMRES"); static struct ddebug_class_param drm_debug_bitmap = { .bits = &__drm_debug, .flags = "p", .map = &drm_debug_classes, }; module_param_cb(debug, &param_ops_dyndbg_classes, &drm_debug_bitmap, 0600); #endif void __drm_puts_coredump(struct drm_printer *p, const char *str) { struct drm_print_iterator *iterator = p->arg; ssize_t len; if (!iterator->remain) return; if (iterator->offset < iterator->start) { ssize_t copy; len = strlen(str); if (iterator->offset + len <= iterator->start) { iterator->offset += len; return; } copy = len - (iterator->start - iterator->offset); if (copy > iterator->remain) copy = iterator->remain; /* Copy out the bit of the string that we need */ if (iterator->data) memcpy(iterator->data, str + (iterator->start - iterator->offset), copy); iterator->offset = iterator->start + copy; iterator->remain -= copy; } else { ssize_t pos = iterator->offset - iterator->start; len = min_t(ssize_t, strlen(str), iterator->remain); if (iterator->data) memcpy(iterator->data + pos, str, len); iterator->offset += len; iterator->remain -= len; } } EXPORT_SYMBOL(__drm_puts_coredump); void __drm_printfn_coredump(struct drm_printer *p, struct va_format *vaf) { struct drm_print_iterator *iterator = p->arg; size_t len; char *buf; if (!iterator->remain) return; /* Figure out how big the string will be */ len = snprintf(NULL, 0, "%pV", vaf); /* This is the easiest path, we've already advanced beyond the offset */ if (iterator->offset + len <= iterator->start) { iterator->offset += len; return; } /* Then check if we can directly copy into the target buffer */ if ((iterator->offset >= iterator->start) && (len < iterator->remain)) { ssize_t pos = iterator->offset - iterator->start; if (iterator->data) snprintf(((char *) iterator->data) + pos, iterator->remain, "%pV", vaf); iterator->offset += len; iterator->remain -= len; return; } /* * Finally, hit the slow path and make a temporary string to copy over * using _drm_puts_coredump */ buf = kmalloc(len + 1, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); if (!buf) return; snprintf(buf, len + 1, "%pV", vaf); __drm_puts_coredump(p, (const char *) buf); kfree(buf); } EXPORT_SYMBOL(__drm_printfn_coredump); void __drm_puts_seq_file(struct drm_printer *p, const char *str) { seq_puts(p->arg, str); } EXPORT_SYMBOL(__drm_puts_seq_file); void __drm_printfn_seq_file(struct drm_printer *p, struct va_format *vaf) { seq_printf(p->arg, "%pV", vaf); } EXPORT_SYMBOL(__drm_printfn_seq_file); static void __drm_dev_vprintk(const struct device *dev, const char *level, const void *origin, const char *prefix, struct va_format *vaf) { const char *prefix_pad = prefix ? " " : ""; if (!prefix) prefix = ""; if (dev) { if (origin) dev_printk(level, dev, "[" DRM_NAME ":%ps]%s%s %pV", origin, prefix_pad, prefix, vaf); else dev_printk(level, dev, "[" DRM_NAME "]%s%s %pV", prefix_pad, prefix, vaf); } else { if (origin) printk("%s" "[" DRM_NAME ":%ps]%s%s %pV", level, origin, prefix_pad, prefix, vaf); else printk("%s" "[" DRM_NAME "]%s%s %pV", level, prefix_pad, prefix, vaf); } } void __drm_printfn_info(struct drm_printer *p, struct va_format *vaf) { dev_info(p->arg, "[" DRM_NAME "] %pV", vaf); } EXPORT_SYMBOL(__drm_printfn_info); void __drm_printfn_dbg(struct drm_printer *p, struct va_format *vaf) { const struct drm_device *drm = p->arg; const struct device *dev = drm ? drm->dev : NULL; enum drm_debug_category category = p->category; if (!__drm_debug_enabled(category)) return; __drm_dev_vprintk(dev, KERN_DEBUG, p->origin, p->prefix, vaf); } EXPORT_SYMBOL(__drm_printfn_dbg); void __drm_printfn_err(struct drm_printer *p, struct va_format *vaf) { struct drm_device *drm = p->arg; if (p->prefix) drm_err(drm, "%s %pV", p->prefix, vaf); else drm_err(drm, "%pV", vaf); } EXPORT_SYMBOL(__drm_printfn_err); void __drm_printfn_line(struct drm_printer *p, struct va_format *vaf) { unsigned int counter = ++p->line.counter; const char *prefix = p->prefix ?: ""; const char *pad = p->prefix ? " " : ""; if (p->line.series) drm_printf(p->arg, "%s%s%u.%u: %pV", prefix, pad, p->line.series, counter, vaf); else drm_printf(p->arg, "%s%s%u: %pV", prefix, pad, counter, vaf); } EXPORT_SYMBOL(__drm_printfn_line); /** * drm_puts - print a const string to a &drm_printer stream * @p: the &drm printer * @str: const string * * Allow &drm_printer types that have a constant string * option to use it. */ void drm_puts(struct drm_printer *p, const char *str) { if (p->puts) p->puts(p, str); else drm_printf(p, "%s", str); } EXPORT_SYMBOL(drm_puts); /** * drm_printf - print to a &drm_printer stream * @p: the &drm_printer * @f: format string */ void drm_printf(struct drm_printer *p, const char *f, ...) { va_list args; va_start(args, f); drm_vprintf(p, f, &args); va_end(args); } EXPORT_SYMBOL(drm_printf); /** * drm_print_bits - print bits to a &drm_printer stream * * Print bits (in flag fields for example) in human readable form. * * @p: the &drm_printer * @value: field value. * @bits: Array with bit names. * @nbits: Size of bit names array. */ void drm_print_bits(struct drm_printer *p, unsigned long value, const char * const bits[], unsigned int nbits) { bool first = true; unsigned int i; if (WARN_ON_ONCE(nbits > BITS_PER_TYPE(value))) nbits = BITS_PER_TYPE(value); for_each_set_bit(i, &value, nbits) { if (WARN_ON_ONCE(!bits[i])) continue; drm_printf(p, "%s%s", first ? "" : ",", bits[i]); first = false; } if (first) drm_printf(p, "(none)"); } EXPORT_SYMBOL(drm_print_bits); void drm_dev_printk(const struct device *dev, const char *level, const char *format, ...) { struct va_format vaf; va_list args; va_start(args, format); vaf.fmt = format; vaf.va = &args; __drm_dev_vprintk(dev, level, __builtin_return_address(0), NULL, &vaf); va_end(args); } EXPORT_SYMBOL(drm_dev_printk); void __drm_dev_dbg(struct _ddebug *desc, const struct device *dev, enum drm_debug_category category, const char *format, ...) { struct va_format vaf; va_list args; if (!__drm_debug_enabled(category)) return; /* we know we are printing for either syslog, tracefs, or both */ va_start(args, format); vaf.fmt = format; vaf.va = &args; __drm_dev_vprintk(dev, KERN_DEBUG, __builtin_return_address(0), NULL, &vaf); va_end(args); } EXPORT_SYMBOL(__drm_dev_dbg); void __drm_err(const char *format, ...) { struct va_format vaf; va_list args; va_start(args, format); vaf.fmt = format; vaf.va = &args; __drm_dev_vprintk(NULL, KERN_ERR, __builtin_return_address(0), "*ERROR*", &vaf); va_end(args); } EXPORT_SYMBOL(__drm_err); /** * drm_print_regset32 - print the contents of registers to a * &drm_printer stream. * * @p: the &drm printer * @regset: the list of registers to print. * * Often in driver debug, it's useful to be able to either capture the * contents of registers in the steady state using debugfs or at * specific points during operation. This lets the driver have a * single list of registers for both. */ void drm_print_regset32(struct drm_printer *p, struct debugfs_regset32 *regset) { int namelen = 0; int i; for (i = 0; i < regset->nregs; i++) namelen = max(namelen, (int)strlen(regset->regs[i].name)); for (i = 0; i < regset->nregs; i++) { drm_printf(p, "%*s = 0x%08x\n", namelen, regset->regs[i].name, readl(regset->base + regset->regs[i].offset)); } } EXPORT_SYMBOL(drm_print_regset32); /** * drm_print_hex_dump - print a hex dump to a &drm_printer stream * @p: The &drm_printer * @prefix: Prefix for each line, may be NULL for no prefix * @buf: Buffer to dump * @len: Length of buffer * * Print hex dump to &drm_printer, with 16 space-separated hex bytes per line, * optionally with a prefix on each line. No separator is added after prefix. */ void drm_print_hex_dump(struct drm_printer *p, const char *prefix, const u8 *buf, size_t len) { int i; for (i = 0; i < len; i += 16) { int bytes_per_line = min(16, len - i); drm_printf(p, "%s%*ph\n", prefix ?: "", bytes_per_line, buf + i); } } EXPORT_SYMBOL(drm_print_hex_dump);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Key-agreement Protocol Primitives (KPP) * * Copyright (c) 2016, Intel Corporation * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com> */ #ifndef _CRYPTO_KPP_ #define _CRYPTO_KPP_ #include <linux/atomic.h> #include <linux/container_of.h> #include <linux/crypto.h> #include <linux/slab.h> /** * struct kpp_request * * @base: Common attributes for async crypto requests * @src: Source data * @dst: Destination data * @src_len: Size of the input buffer * @dst_len: Size of the output buffer. It needs to be at least * as big as the expected result depending on the operation * After operation it will be updated with the actual size of the * result. In case of error where the dst sgl size was insufficient, * it will be updated to the size required for the operation. * @__ctx: Start of private context data */ struct kpp_request { struct crypto_async_request base; struct scatterlist *src; struct scatterlist *dst; unsigned int src_len; unsigned int dst_len; void *__ctx[] CRYPTO_MINALIGN_ATTR; }; /** * struct crypto_kpp - user-instantiated object which encapsulate * algorithms and core processing logic * * @reqsize: Request context size required by algorithm * implementation * @base: Common crypto API algorithm data structure */ struct crypto_kpp { unsigned int reqsize; struct crypto_tfm base; }; /** * struct kpp_alg - generic key-agreement protocol primitives * * @set_secret: Function invokes the protocol specific function to * store the secret private key along with parameters. * The implementation knows how to decode the buffer * @generate_public_key: Function generate the public key to be sent to the * counterpart. In case of error, where output is not big * enough req->dst_len will be updated to the size * required * @compute_shared_secret: Function compute the shared secret as defined by * the algorithm. The result is given back to the user. * In case of error, where output is not big enough, * req->dst_len will be updated to the size required * @max_size: Function returns the size of the output buffer * @init: Initialize the object. This is called only once at * instantiation time. In case the cryptographic hardware * needs to be initialized. Software fallback should be * put in place here. * @exit: Undo everything @init did. * * @base: Common crypto API algorithm data structure */ struct kpp_alg { int (*set_secret)(struct crypto_kpp *tfm, const void *buffer, unsigned int len); int (*generate_public_key)(struct kpp_request *req); int (*compute_shared_secret)(struct kpp_request *req); unsigned int (*max_size)(struct crypto_kpp *tfm); int (*init)(struct crypto_kpp *tfm); void (*exit)(struct crypto_kpp *tfm); struct crypto_alg base; }; /** * DOC: Generic Key-agreement Protocol Primitives API * * The KPP API is used with the algorithm type * CRYPTO_ALG_TYPE_KPP (listed as type "kpp" in /proc/crypto) */ /** * crypto_alloc_kpp() - allocate KPP tfm handle * @alg_name: is the name of the kpp algorithm (e.g. "dh", "ecdh") * @type: specifies the type of the algorithm * @mask: specifies the mask for the algorithm * * Allocate a handle for kpp algorithm. The returned struct crypto_kpp * is required for any following API invocation * * Return: allocated handle in case of success; IS_ERR() is true in case of * an error, PTR_ERR() returns the error code. */ struct crypto_kpp *crypto_alloc_kpp(const char *alg_name, u32 type, u32 mask); int crypto_has_kpp(const char *alg_name, u32 type, u32 mask); static inline struct crypto_tfm *crypto_kpp_tfm(struct crypto_kpp *tfm) { return &tfm->base; } static inline struct kpp_alg *__crypto_kpp_alg(struct crypto_alg *alg) { return container_of(alg, struct kpp_alg, base); } static inline struct crypto_kpp *__crypto_kpp_tfm(struct crypto_tfm *tfm) { return container_of(tfm, struct crypto_kpp, base); } static inline struct kpp_alg *crypto_kpp_alg(struct crypto_kpp *tfm) { return __crypto_kpp_alg(crypto_kpp_tfm(tfm)->__crt_alg); } static inline unsigned int crypto_kpp_reqsize(struct crypto_kpp *tfm) { return tfm->reqsize; } static inline void kpp_request_set_tfm(struct kpp_request *req, struct crypto_kpp *tfm) { req->base.tfm = crypto_kpp_tfm(tfm); } static inline struct crypto_kpp *crypto_kpp_reqtfm(struct kpp_request *req) { return __crypto_kpp_tfm(req->base.tfm); } static inline u32 crypto_kpp_get_flags(struct crypto_kpp *tfm) { return crypto_tfm_get_flags(crypto_kpp_tfm(tfm)); } static inline void crypto_kpp_set_flags(struct crypto_kpp *tfm, u32 flags) { crypto_tfm_set_flags(crypto_kpp_tfm(tfm), flags); } /** * crypto_free_kpp() - free KPP tfm handle * * @tfm: KPP tfm handle allocated with crypto_alloc_kpp() * * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_kpp(struct crypto_kpp *tfm) { crypto_destroy_tfm(tfm, crypto_kpp_tfm(tfm)); } /** * kpp_request_alloc() - allocates kpp request * * @tfm: KPP tfm handle allocated with crypto_alloc_kpp() * @gfp: allocation flags * * Return: allocated handle in case of success or NULL in case of an error. */ static inline struct kpp_request *kpp_request_alloc(struct crypto_kpp *tfm, gfp_t gfp) { struct kpp_request *req; req = kmalloc(sizeof(*req) + crypto_kpp_reqsize(tfm), gfp); if (likely(req)) kpp_request_set_tfm(req, tfm); return req; } /** * kpp_request_free() - zeroize and free kpp request * * @req: request to free */ static inline void kpp_request_free(struct kpp_request *req) { kfree_sensitive(req); } /** * kpp_request_set_callback() - Sets an asynchronous callback. * * Callback will be called when an asynchronous operation on a given * request is finished. * * @req: request that the callback will be set for * @flgs: specify for instance if the operation may backlog * @cmpl: callback which will be called * @data: private data used by the caller */ static inline void kpp_request_set_callback(struct kpp_request *req, u32 flgs, crypto_completion_t cmpl, void *data) { req->base.complete = cmpl; req->base.data = data; req->base.flags = flgs; } /** * kpp_request_set_input() - Sets input buffer * * Sets parameters required by generate_public_key * * @req: kpp request * @input: ptr to input scatter list * @input_len: size of the input scatter list */ static inline void kpp_request_set_input(struct kpp_request *req, struct scatterlist *input, unsigned int input_len) { req->src = input; req->src_len = input_len; } /** * kpp_request_set_output() - Sets output buffer * * Sets parameters required by kpp operation * * @req: kpp request * @output: ptr to output scatter list * @output_len: size of the output scatter list */ static inline void kpp_request_set_output(struct kpp_request *req, struct scatterlist *output, unsigned int output_len) { req->dst = output; req->dst_len = output_len; } enum { CRYPTO_KPP_SECRET_TYPE_UNKNOWN, CRYPTO_KPP_SECRET_TYPE_DH, CRYPTO_KPP_SECRET_TYPE_ECDH, }; /** * struct kpp_secret - small header for packing secret buffer * * @type: define type of secret. Each kpp type will define its own * @len: specify the len of the secret, include the header, that * follows the struct */ struct kpp_secret { unsigned short type; unsigned short len; }; /** * crypto_kpp_set_secret() - Invoke kpp operation * * Function invokes the specific kpp operation for a given alg. * * @tfm: tfm handle * @buffer: Buffer holding the packet representation of the private * key. The structure of the packet key depends on the particular * KPP implementation. Packing and unpacking helpers are provided * for ECDH and DH (see the respective header files for those * implementations). * @len: Length of the packet private key buffer. * * Return: zero on success; error code in case of error */ static inline int crypto_kpp_set_secret(struct crypto_kpp *tfm, const void *buffer, unsigned int len) { return crypto_kpp_alg(tfm)->set_secret(tfm, buffer, len); } /** * crypto_kpp_generate_public_key() - Invoke kpp operation * * Function invokes the specific kpp operation for generating the public part * for a given kpp algorithm. * * To generate a private key, the caller should use a random number generator. * The output of the requested length serves as the private key. * * @req: kpp key request * * Return: zero on success; error code in case of error */ static inline int crypto_kpp_generate_public_key(struct kpp_request *req) { struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); return crypto_kpp_alg(tfm)->generate_public_key(req); } /** * crypto_kpp_compute_shared_secret() - Invoke kpp operation * * Function invokes the specific kpp operation for computing the shared secret * for a given kpp algorithm. * * @req: kpp key request * * Return: zero on success; error code in case of error */ static inline int crypto_kpp_compute_shared_secret(struct kpp_request *req) { struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); return crypto_kpp_alg(tfm)->compute_shared_secret(req); } /** * crypto_kpp_maxsize() - Get len for output buffer * * Function returns the output buffer size required for a given key. * Function assumes that the key is already set in the transformation. If this * function is called without a setkey or with a failed setkey, you will end up * in a NULL dereference. * * @tfm: KPP tfm handle allocated with crypto_alloc_kpp() */ static inline unsigned int crypto_kpp_maxsize(struct crypto_kpp *tfm) { struct kpp_alg *alg = crypto_kpp_alg(tfm); return alg->max_size(tfm); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. */ #ifndef __RGRP_DOT_H__ #define __RGRP_DOT_H__ #include <linux/slab.h> #include <linux/uaccess.h> /* Since each block in the file system is represented by two bits in the * bitmap, one 64-bit word in the bitmap will represent 32 blocks. * By reserving 32 blocks at a time, we can optimize / shortcut how we search * through the bitmaps by looking a word at a time. */ #define RGRP_RSRV_MINBLKS 32 #define RGRP_RSRV_ADDBLKS 64 struct gfs2_rgrpd; struct gfs2_sbd; struct gfs2_holder; void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact); struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); int gfs2_rindex_update(struct gfs2_sbd *sdp); void gfs2_free_clones(struct gfs2_rgrpd *rgd); int gfs2_rgrp_go_instantiate(struct gfs2_glock *gl); void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd); struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); #define GFS2_AF_ORLOV 1 int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap); void gfs2_inplace_release(struct gfs2_inode *ip); int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode); void gfs2_rs_deltree(struct gfs2_blkreserv *rs); void gfs2_rs_delete(struct gfs2_inode *ip); void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, u64 bstart, u32 blen, int meta); void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, u64 bstart, u32 blen); void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); void gfs2_unlink_di(struct inode *inode); int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type); struct gfs2_rgrp_list { unsigned int rl_rgrps; unsigned int rl_space; struct gfs2_rgrpd **rl_rgd; struct gfs2_holder *rl_ghs; }; void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, u64 block); void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state, u16 flags); void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); u64 gfs2_ri_total(struct gfs2_sbd *sdp); void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd, const char *fs_id_buf); int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, struct buffer_head *bh, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); int gfs2_fitrim(struct file *filp, void __user *argp); /* This is how to tell if a reservation is in the rgrp tree: */ static inline bool gfs2_rs_active(const struct gfs2_blkreserv *rs) { return !RB_EMPTY_NODE(&rs->rs_node); } static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) { u64 first = rgd->rd_data0; u64 last = first + rgd->rd_data; return first <= block && block < last; } void check_and_update_goal(struct gfs2_inode *ip); void rgrp_lock_local(struct gfs2_rgrpd *rgd); void rgrp_unlock_local(struct gfs2_rgrpd *rgd); #endif /* __RGRP_DOT_H__ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 // SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ #ifndef __XFS_REFCOUNT_ITEM_H__ #define __XFS_REFCOUNT_ITEM_H__ /* * There are (currently) two pairs of refcount btree redo item types: * increase and decrease. The log items for these are CUI (refcount * update intent) and CUD (refcount update done). The redo item type * is encoded in the flags field of each xfs_map_extent. * * *I items should be recorded in the *first* of a series of rolled * transactions, and the *D items should be recorded in the same * transaction that records the associated refcountbt updates. * * Should the system crash after the commit of the first transaction * but before the commit of the final transaction in a series, log * recovery will use the redo information recorded by the intent items * to replay the refcountbt metadata updates. */ /* kernel only CUI/CUD definitions */ struct xfs_mount; struct kmem_cache; /* * Max number of extents in fast allocation path. */ #define XFS_CUI_MAX_FAST_EXTENTS 16 /* * This is the "refcount update intent" log item. It is used to log * the fact that some reverse mappings need to change. It is used in * conjunction with the "refcount update done" log item described * below. * * These log items follow the same rules as struct xfs_efi_log_item; * see the comments about that structure (in xfs_extfree_item.h) for * more details. */ struct xfs_cui_log_item { struct xfs_log_item cui_item; atomic_t cui_refcount; atomic_t cui_next_extent; struct xfs_cui_log_format cui_format; }; static inline size_t xfs_cui_log_item_sizeof( unsigned int nr) { return offsetof(struct xfs_cui_log_item, cui_format) + xfs_cui_log_format_sizeof(nr); } /* * This is the "refcount update done" log item. It is used to log the * fact that some refcountbt updates mentioned in an earlier cui item * have been performed. */ struct xfs_cud_log_item { struct xfs_log_item cud_item; struct xfs_cui_log_item *cud_cuip; struct xfs_cud_log_format cud_format; }; extern struct kmem_cache *xfs_cui_cache; extern struct kmem_cache *xfs_cud_cache; struct xfs_refcount_intent; void xfs_refcount_defer_add(struct xfs_trans *tp, struct xfs_refcount_intent *ri); unsigned int xfs_cui_log_space(unsigned int nr); unsigned int xfs_cud_log_space(void); #endif /* __XFS_REFCOUNT_ITEM_H__ */
2 3 1 2 1 1 4 4 4 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 // SPDX-License-Identifier: GPL-2.0-only /* * QNX4 file system, Linux implementation. * * Version : 0.2.1 * * Using parts of the xiafs filesystem. * * History : * * 01-06-1998 by Richard Frowijn : first release. * 20-06-1998 by Frank Denis : Linux 2.1.99+ support, boot signature, misc. * 30-06-1998 by Frank Denis : first step to write inodes. */ #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/highuid.h> #include <linux/pagemap.h> #include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/statfs.h> #include <linux/fs_context.h> #include "qnx4.h" #define QNX4_VERSION 4 #define QNX4_BMNAME ".bitmap" static const struct super_operations qnx4_sops; static struct inode *qnx4_alloc_inode(struct super_block *sb); static void qnx4_free_inode(struct inode *inode); static int qnx4_statfs(struct dentry *, struct kstatfs *); static int qnx4_get_tree(struct fs_context *fc); static const struct super_operations qnx4_sops = { .alloc_inode = qnx4_alloc_inode, .free_inode = qnx4_free_inode, .statfs = qnx4_statfs, }; static int qnx4_reconfigure(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; struct qnx4_sb_info *qs; sync_filesystem(sb); qs = qnx4_sb(sb); qs->Version = QNX4_VERSION; fc->sb_flags |= SB_RDONLY; return 0; } static const struct fs_context_operations qnx4_context_opts = { .get_tree = qnx4_get_tree, .reconfigure = qnx4_reconfigure, }; static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_head *bh, int create ) { unsigned long phys; QNX4DEBUG((KERN_INFO "qnx4: qnx4_get_block inode=[%ld] iblock=[%ld]\n",inode->i_ino,iblock)); phys = qnx4_block_map( inode, iblock ); if ( phys ) { // logical block is before EOF map_bh(bh, inode->i_sb, phys); } return 0; } static inline u32 try_extent(qnx4_xtnt_t *extent, u32 *offset) { u32 size = le32_to_cpu(extent->xtnt_size); if (*offset < size) return le32_to_cpu(extent->xtnt_blk) + *offset - 1; *offset -= size; return 0; } unsigned long qnx4_block_map( struct inode *inode, long iblock ) { int ix; long i_xblk; struct buffer_head *bh = NULL; struct qnx4_xblk *xblk = NULL; struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode); u16 nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts); u32 offset = iblock; u32 block = try_extent(&qnx4_inode->di_first_xtnt, &offset); if (block) { // iblock is in the first extent. This is easy. } else { // iblock is beyond first extent. We have to follow the extent chain. i_xblk = le32_to_cpu(qnx4_inode->di_xblk); ix = 0; while ( --nxtnt > 0 ) { if ( ix == 0 ) { // read next xtnt block. bh = sb_bread(inode->i_sb, i_xblk - 1); if ( !bh ) { QNX4DEBUG((KERN_ERR "qnx4: I/O error reading xtnt block [%ld])\n", i_xblk - 1)); return -EIO; } xblk = (struct qnx4_xblk*)bh->b_data; if ( memcmp( xblk->xblk_signature, "IamXblk", 7 ) ) { QNX4DEBUG((KERN_ERR "qnx4: block at %ld is not a valid xtnt\n", qnx4_inode->i_xblk)); return -EIO; } } block = try_extent(&xblk->xblk_xtnts[ix], &offset); if (block) { // got it! break; } if ( ++ix >= xblk->xblk_num_xtnts ) { i_xblk = le32_to_cpu(xblk->xblk_next_xblk); ix = 0; brelse( bh ); bh = NULL; } } if ( bh ) brelse( bh ); } QNX4DEBUG((KERN_INFO "qnx4: mapping block %ld of inode %ld = %ld\n",iblock,inode->i_ino,block)); return block; } static int qnx4_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; u64 id = huge_encode_dev(sb->s_bdev->bd_dev); buf->f_type = sb->s_magic; buf->f_bsize = sb->s_blocksize; buf->f_blocks = le32_to_cpu(qnx4_sb(sb)->BitMap->di_size) * 8; buf->f_bfree = qnx4_count_free_blocks(sb); buf->f_bavail = buf->f_bfree; buf->f_namelen = QNX4_NAME_MAX; buf->f_fsid = u64_to_fsid(id); return 0; } /* * Check the root directory of the filesystem to make sure * it really _is_ a qnx4 filesystem, and to check the size * of the directory entry. */ static const char *qnx4_checkroot(struct super_block *sb, struct qnx4_super_block *s) { struct buffer_head *bh; struct qnx4_inode_entry *rootdir; int rd, rl; int i, j; if (s->RootDir.di_fname[0] != '/' || s->RootDir.di_fname[1] != '\0') return "no qnx4 filesystem (no root dir)."; QNX4DEBUG((KERN_NOTICE "QNX4 filesystem found on dev %s.\n", sb->s_id)); rd = le32_to_cpu(s->RootDir.di_first_xtnt.xtnt_blk) - 1; rl = le32_to_cpu(s->RootDir.di_first_xtnt.xtnt_size); for (j = 0; j < rl; j++) { bh = sb_bread(sb, rd + j); /* root dir, first block */ if (bh == NULL) return "unable to read root entry."; rootdir = (struct qnx4_inode_entry *) bh->b_data; for (i = 0; i < QNX4_INODES_PER_BLOCK; i++, rootdir++) { QNX4DEBUG((KERN_INFO "rootdir entry found : [%s]\n", rootdir->di_fname)); if (strcmp(rootdir->di_fname, QNX4_BMNAME) != 0) continue; qnx4_sb(sb)->BitMap = kmemdup(rootdir, sizeof(struct qnx4_inode_entry), GFP_KERNEL); brelse(bh); if (!qnx4_sb(sb)->BitMap) return "not enough memory for bitmap inode"; /* keep bitmap inode known */ return NULL; } brelse(bh); } return "bitmap file not found."; } static int qnx4_fill_super(struct super_block *s, struct fs_context *fc) { struct buffer_head *bh; struct inode *root; const char *errmsg; struct qnx4_sb_info *qs; int silent = fc->sb_flags & SB_SILENT; qs = kzalloc(sizeof(struct qnx4_sb_info), GFP_KERNEL); if (!qs) return -ENOMEM; s->s_fs_info = qs; sb_set_blocksize(s, QNX4_BLOCK_SIZE); s->s_op = &qnx4_sops; s->s_magic = QNX4_SUPER_MAGIC; s->s_flags |= SB_RDONLY; /* Yup, read-only yet */ s->s_time_min = 0; s->s_time_max = U32_MAX; /* Check the superblock signature. Since the qnx4 code is dangerous, we should leave as quickly as possible if we don't belong here... */ bh = sb_bread(s, 1); if (!bh) { printk(KERN_ERR "qnx4: unable to read the superblock\n"); return -EINVAL; } /* check before allocating dentries, inodes, .. */ errmsg = qnx4_checkroot(s, (struct qnx4_super_block *) bh->b_data); brelse(bh); if (errmsg != NULL) { if (!silent) printk(KERN_ERR "qnx4: %s\n", errmsg); return -EINVAL; } /* does root not have inode number QNX4_ROOT_INO ?? */ root = qnx4_iget(s, QNX4_ROOT_INO * QNX4_INODES_PER_BLOCK); if (IS_ERR(root)) { printk(KERN_ERR "qnx4: get inode failed\n"); return PTR_ERR(root); } s->s_root = d_make_root(root); if (s->s_root == NULL) return -ENOMEM; return 0; } static int qnx4_get_tree(struct fs_context *fc) { return get_tree_bdev(fc, qnx4_fill_super); } static int qnx4_init_fs_context(struct fs_context *fc) { fc->ops = &qnx4_context_opts; return 0; } static void qnx4_kill_sb(struct super_block *sb) { struct qnx4_sb_info *qs = qnx4_sb(sb); kill_block_super(sb); if (qs) { kfree(qs->BitMap); kfree(qs); } } static int qnx4_read_folio(struct file *file, struct folio *folio) { return block_read_full_folio(folio, qnx4_get_block); } static sector_t qnx4_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,qnx4_get_block); } static const struct address_space_operations qnx4_aops = { .read_folio = qnx4_read_folio, .bmap = qnx4_bmap }; struct inode *qnx4_iget(struct super_block *sb, unsigned long ino) { struct buffer_head *bh; struct qnx4_inode_entry *raw_inode; int block; struct qnx4_inode_entry *qnx4_inode; struct inode *inode; inode = iget_locked(sb, ino); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) return inode; qnx4_inode = qnx4_raw_inode(inode); inode->i_mode = 0; QNX4DEBUG((KERN_INFO "reading inode : [%d]\n", ino)); if (!ino) { printk(KERN_ERR "qnx4: bad inode number on dev %s: %lu is " "out of range\n", sb->s_id, ino); iget_failed(inode); return ERR_PTR(-EIO); } block = ino / QNX4_INODES_PER_BLOCK; if (!(bh = sb_bread(sb, block))) { printk(KERN_ERR "qnx4: major problem: unable to read inode from dev " "%s\n", sb->s_id); iget_failed(inode); return ERR_PTR(-EIO); } raw_inode = ((struct qnx4_inode_entry *) bh->b_data) + (ino % QNX4_INODES_PER_BLOCK); inode->i_mode = le16_to_cpu(raw_inode->di_mode); i_uid_write(inode, (uid_t)le16_to_cpu(raw_inode->di_uid)); i_gid_write(inode, (gid_t)le16_to_cpu(raw_inode->di_gid)); set_nlink(inode, le16_to_cpu(raw_inode->di_nlink)); inode->i_size = le32_to_cpu(raw_inode->di_size); inode_set_mtime(inode, le32_to_cpu(raw_inode->di_mtime), 0); inode_set_atime(inode, le32_to_cpu(raw_inode->di_atime), 0); inode_set_ctime(inode, le32_to_cpu(raw_inode->di_ctime), 0); inode->i_blocks = le32_to_cpu(raw_inode->di_first_xtnt.xtnt_size); memcpy(qnx4_inode, raw_inode, QNX4_DIR_ENTRY_SIZE); if (S_ISREG(inode->i_mode)) { inode->i_fop = &generic_ro_fops; inode->i_mapping->a_ops = &qnx4_aops; qnx4_i(inode)->mmu_private = inode->i_size; } else if (S_ISDIR(inode->i_mode)) { inode->i_op = &qnx4_dir_inode_operations; inode->i_fop = &qnx4_dir_operations; } else if (S_ISLNK(inode->i_mode)) { inode->i_op = &page_symlink_inode_operations; inode_nohighmem(inode); inode->i_mapping->a_ops = &qnx4_aops; qnx4_i(inode)->mmu_private = inode->i_size; } else { printk(KERN_ERR "qnx4: bad inode %lu on dev %s\n", ino, sb->s_id); iget_failed(inode); brelse(bh); return ERR_PTR(-EIO); } brelse(bh); unlock_new_inode(inode); return inode; } static struct kmem_cache *qnx4_inode_cachep; static struct inode *qnx4_alloc_inode(struct super_block *sb) { struct qnx4_inode_info *ei; ei = alloc_inode_sb(sb, qnx4_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; } static void qnx4_free_inode(struct inode *inode) { kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode)); } static void init_once(void *foo) { struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo; inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) { qnx4_inode_cachep = kmem_cache_create("qnx4_inode_cache", sizeof(struct qnx4_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_ACCOUNT), init_once); if (qnx4_inode_cachep == NULL) return -ENOMEM; return 0; } static void destroy_inodecache(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(qnx4_inode_cachep); } static struct file_system_type qnx4_fs_type = { .owner = THIS_MODULE, .name = "qnx4", .kill_sb = qnx4_kill_sb, .fs_flags = FS_REQUIRES_DEV, .init_fs_context = qnx4_init_fs_context, }; MODULE_ALIAS_FS("qnx4"); static int __init init_qnx4_fs(void) { int err; err = init_inodecache(); if (err) return err; err = register_filesystem(&qnx4_fs_type); if (err) { destroy_inodecache(); return err; } printk(KERN_INFO "QNX4 filesystem 0.2.3 registered.\n"); return 0; } static void __exit exit_qnx4_fs(void) { unregister_filesystem(&qnx4_fs_type); destroy_inodecache(); } module_init(init_qnx4_fs) module_exit(exit_qnx4_fs) MODULE_DESCRIPTION("QNX4 file system"); MODULE_LICENSE("GPL");
9 9 9 9 9 9 2 1 25 1 29 30 26 5 6 29 15 59 1 33 1 4 8 2 8 2 4 2 2 2 2 2 3 1 1 2 1 14 1 12 1 1 2 1 1 3 13 13 13 12 9 10 3 1 6 9 9 1 109 3 2 1 109 106 1 91 16 101 10 106 110 109 1 109 104 10 2 8 6 4 2 1 2 2 6 5 2 4 6 6 8 23 137 137 5 22 110 1 83 65 14 77 8 8 14 14 14 20 7 14 4 4 6 4 6 1 6 1 1 5 1 46 1 3 39 8 8 14 9 3 26 19 9 4 5 1 1 3 1 1 51 3 26 19 6 76 65 4 8 7 8 13 90 81 81 44 35 8 1 32 6 24 3 21 6 12 30 30 25 5 28 28 2 17 2 19 4 7 14 1 8 5 12 1 9 5 9 152 231 1 1 1 1 1 1 1 1 1 3 1 1 220 3 1 2 50 50 50 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT) * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * * Changes: * Roger Venning <r.venning@telstra.com>: 6to4 support * Nate Thompson <nate@thebog.net>: 6to4 support * Fred Templin <fred.l.templin@boeing.com>: isatap support */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/icmp.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/init.h> #include <linux/netfilter_ipv4.h> #include <linux/if_ether.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/transp_v6.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> #include <net/ndisc.h> #include <net/addrconf.h> #include <net/ip.h> #include <net/udp.h> #include <net/icmp.h> #include <net/ip_tunnels.h> #include <net/inet_ecn.h> #include <net/xfrm.h> #include <net/dsfield.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/netdev_lock.h> #include <net/inet_dscp.h> /* This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c For comments look at net/ipv4/ip_gre.c --ANK */ #define IP6_SIT_HASH_SIZE 16 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static int ipip6_tunnel_init(struct net_device *dev); static void ipip6_tunnel_setup(struct net_device *dev); static void ipip6_dev_free(struct net_device *dev); static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, __be32 *v4dst); static struct rtnl_link_ops sit_link_ops __read_mostly; static unsigned int sit_net_id __read_mostly; struct sit_net { struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE]; struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE]; struct ip_tunnel __rcu *tunnels_l[IP6_SIT_HASH_SIZE]; struct ip_tunnel __rcu *tunnels_wc[1]; struct ip_tunnel __rcu **tunnels[4]; struct net_device *fb_tunnel_dev; }; static inline struct sit_net *dev_to_sit_net(struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); return net_generic(t->net, sit_net_id); } /* * Must be invoked with rcu_read_lock */ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net, struct net_device *dev, __be32 remote, __be32 local, int sifindex) { unsigned int h0 = HASH(remote); unsigned int h1 = HASH(local); struct ip_tunnel *t; struct sit_net *sitn = net_generic(net, sit_net_id); int ifindex = dev ? dev->ifindex : 0; for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && (!dev || !t->parms.link || ifindex == t->parms.link || sifindex == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) { if (remote == t->parms.iph.daddr && (!dev || !t->parms.link || ifindex == t->parms.link || sifindex == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) { if (local == t->parms.iph.saddr && (!dev || !t->parms.link || ifindex == t->parms.link || sifindex == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } t = rcu_dereference(sitn->tunnels_wc[0]); if (t && (t->dev->flags & IFF_UP)) return t; return NULL; } static struct ip_tunnel __rcu ** __ipip6_bucket(struct sit_net *sitn, struct ip_tunnel_parm_kern *parms) { __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; unsigned int h = 0; int prio = 0; if (remote) { prio |= 2; h ^= HASH(remote); } if (local) { prio |= 1; h ^= HASH(local); } return &sitn->tunnels[prio][h]; } static inline struct ip_tunnel __rcu **ipip6_bucket(struct sit_net *sitn, struct ip_tunnel *t) { return __ipip6_bucket(sitn, &t->parms); } static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t) { struct ip_tunnel __rcu **tp; struct ip_tunnel *iter; for (tp = ipip6_bucket(sitn, t); (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { if (t == iter) { rcu_assign_pointer(*tp, t->next); break; } } } static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t) { struct ip_tunnel __rcu **tp = ipip6_bucket(sitn, t); rcu_assign_pointer(t->next, rtnl_dereference(*tp)); rcu_assign_pointer(*tp, t); } static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) { #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel *t = netdev_priv(dev); if (dev == sitn->fb_tunnel_dev || !sitn->fb_tunnel_dev) { ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0); t->ip6rd.relay_prefix = 0; t->ip6rd.prefixlen = 16; t->ip6rd.relay_prefixlen = 0; } else { struct ip_tunnel *t0 = netdev_priv(sitn->fb_tunnel_dev); memcpy(&t->ip6rd, &t0->ip6rd, sizeof(t->ip6rd)); } #endif } static int ipip6_tunnel_create(struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); struct sit_net *sitn = net_generic(t->net, sit_net_id); int err; __dev_addr_set(dev, &t->parms.iph.saddr, 4); memcpy(dev->broadcast, &t->parms.iph.daddr, 4); if (test_bit(IP_TUNNEL_SIT_ISATAP_BIT, t->parms.i_flags)) dev->priv_flags |= IFF_ISATAP; dev->rtnl_link_ops = &sit_link_ops; err = register_netdevice(dev); if (err < 0) goto out; ipip6_tunnel_clone_6rd(dev, sitn); ipip6_tunnel_link(sitn, t); return 0; out: return err; } static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, struct ip_tunnel_parm_kern *parms, int create) { __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; struct ip_tunnel *t, *nt; struct ip_tunnel __rcu **tp; struct net_device *dev; char name[IFNAMSIZ]; struct sit_net *sitn = net_generic(net, sit_net_id); for (tp = __ipip6_bucket(sitn, parms); (t = rtnl_dereference(*tp)) != NULL; tp = &t->next) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && parms->link == t->parms.link) { if (create) return NULL; else return t; } } if (!create) goto failed; if (parms->name[0]) { if (!dev_valid_name(parms->name)) goto failed; strscpy(name, parms->name, IFNAMSIZ); } else { strcpy(name, "sit%d"); } dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ipip6_tunnel_setup); if (!dev) return NULL; dev_net_set(dev, net); nt = netdev_priv(dev); nt->net = net; nt->parms = *parms; if (ipip6_tunnel_create(dev) < 0) goto failed_free; if (!parms->name[0]) strcpy(parms->name, dev->name); return nt; failed_free: free_netdev(dev); failed: return NULL; } #define for_each_prl_rcu(start) \ for (prl = rcu_dereference(start); \ prl; \ prl = rcu_dereference(prl->next)) static struct ip_tunnel_prl_entry * __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) { struct ip_tunnel_prl_entry *prl; for_each_prl_rcu(t->prl) if (prl->addr == addr) break; return prl; } static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __user *a) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_prl kprl, *kp; struct ip_tunnel_prl_entry *prl; unsigned int cmax, c = 0, ca, len; int ret = 0; if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) return -EINVAL; if (copy_from_user(&kprl, a, sizeof(kprl))) return -EFAULT; cmax = kprl.datalen / sizeof(kprl); if (cmax > 1 && kprl.addr != htonl(INADDR_ANY)) cmax = 1; /* For simple GET or for root users, * we try harder to allocate. */ kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ? kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) : NULL; ca = min(t->prl_count, cmax); if (!kp) { /* We don't try hard to allocate much memory for * non-root users. * For root users, retry allocating enough memory for * the answer. */ kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC | __GFP_ACCOUNT | __GFP_NOWARN); if (!kp) { ret = -ENOMEM; goto out; } } rcu_read_lock(); for_each_prl_rcu(t->prl) { if (c >= cmax) break; if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr) continue; kp[c].addr = prl->addr; kp[c].flags = prl->flags; c++; if (kprl.addr != htonl(INADDR_ANY)) break; } rcu_read_unlock(); len = sizeof(*kp) * c; ret = 0; if ((len && copy_to_user(a + 1, kp, len)) || put_user(len, &a->datalen)) ret = -EFAULT; kfree(kp); out: return ret; } static int ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) { struct ip_tunnel_prl_entry *p; int err = 0; if (a->addr == htonl(INADDR_ANY)) return -EINVAL; ASSERT_RTNL(); for (p = rtnl_dereference(t->prl); p; p = rtnl_dereference(p->next)) { if (p->addr == a->addr) { if (chg) { p->flags = a->flags; goto out; } err = -EEXIST; goto out; } } if (chg) { err = -ENXIO; goto out; } p = kzalloc(sizeof(struct ip_tunnel_prl_entry), GFP_KERNEL); if (!p) { err = -ENOBUFS; goto out; } p->next = t->prl; p->addr = a->addr; p->flags = a->flags; t->prl_count++; rcu_assign_pointer(t->prl, p); out: return err; } static void prl_list_destroy_rcu(struct rcu_head *head) { struct ip_tunnel_prl_entry *p, *n; p = container_of(head, struct ip_tunnel_prl_entry, rcu_head); do { n = rcu_dereference_protected(p->next, 1); kfree(p); p = n; } while (p); } static int ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) { struct ip_tunnel_prl_entry *x; struct ip_tunnel_prl_entry __rcu **p; int err = 0; ASSERT_RTNL(); if (a && a->addr != htonl(INADDR_ANY)) { for (p = &t->prl; (x = rtnl_dereference(*p)) != NULL; p = &x->next) { if (x->addr == a->addr) { *p = x->next; kfree_rcu(x, rcu_head); t->prl_count--; goto out; } } err = -ENXIO; } else { x = rtnl_dereference(t->prl); if (x) { t->prl_count = 0; call_rcu(&x->rcu_head, prl_list_destroy_rcu); t->prl = NULL; } } out: return err; } static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ip_tunnel_prl __user *data, int cmd) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_prl prl; int err; if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) return -EINVAL; if (copy_from_user(&prl, data, sizeof(prl))) return -EFAULT; switch (cmd) { case SIOCDELPRL: err = ipip6_tunnel_del_prl(t, &prl); break; case SIOCADDPRL: case SIOCCHGPRL: err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); break; } dst_cache_reset(&t->dst_cache); netdev_state_change(dev); return err; } static int isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t) { struct ip_tunnel_prl_entry *p; int ok = 1; rcu_read_lock(); p = __ipip6_tunnel_locate_prl(t, iph->saddr); if (p) { if (p->flags & PRL_DEFAULT) skb->ndisc_nodetype = NDISC_NODETYPE_DEFAULT; else skb->ndisc_nodetype = NDISC_NODETYPE_NODEFAULT; } else { const struct in6_addr *addr6 = &ipv6_hdr(skb)->saddr; if (ipv6_addr_is_isatap(addr6) && (addr6->s6_addr32[3] == iph->saddr) && ipv6_chk_prefix(addr6, t->dev)) skb->ndisc_nodetype = NDISC_NODETYPE_HOST; else ok = 0; } rcu_read_unlock(); return ok; } static void ipip6_tunnel_uninit(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct sit_net *sitn = net_generic(tunnel->net, sit_net_id); if (dev == sitn->fb_tunnel_dev) { RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL); } else { ipip6_tunnel_unlink(sitn, tunnel); ipip6_tunnel_del_prl(tunnel, NULL); } dst_cache_reset(&tunnel->dst_cache); netdev_put(dev, &tunnel->dev_tracker); } static int ipip6_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (const struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; unsigned int data_len = 0; struct ip_tunnel *t; int sifindex; int err; switch (type) { default: case ICMP_PARAMETERPROB: return 0; case ICMP_DEST_UNREACH: switch (code) { case ICMP_SR_FAILED: /* Impossible event. */ return 0; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, I believe they are just ether pollution. --ANK */ break; } break; case ICMP_TIME_EXCEEDED: if (code != ICMP_EXC_TTL) return 0; data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ break; case ICMP_REDIRECT: break; } err = -ENOENT; sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0; t = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->daddr, iph->saddr, sifindex); if (!t) goto out; if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, t->parms.link, iph->protocol); err = 0; goto out; } if (type == ICMP_REDIRECT) { ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, iph->protocol); err = 0; goto out; } err = 0; if (__in6_dev_get(skb->dev) && !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) goto out; if (t->parms.iph.daddr == 0) goto out; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) goto out; if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) t->err_count++; else t->err_count = 1; t->err_time = jiffies; out: return err; } static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr, const struct in6_addr *v6addr) { __be32 v4embed = 0; if (check_6rd(tunnel, v6addr, &v4embed) && v4addr != v4embed) return true; return false; } /* Checks if an address matches an address on the tunnel interface. * Used to detect the NAT of proto 41 packets and let them pass spoofing test. * Long story: * This function is called after we considered the packet as spoofed * in is_spoofed_6rd. * We may have a router that is doing NAT for proto 41 packets * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd * function will return true, dropping the packet. * But, we can still check if is spoofed against the IP * addresses associated with the interface. */ static bool only_dnatted(const struct ip_tunnel *tunnel, const struct in6_addr *v6dst) { int prefix_len; #ifdef CONFIG_IPV6_SIT_6RD prefix_len = tunnel->ip6rd.prefixlen + 32 - tunnel->ip6rd.relay_prefixlen; #else prefix_len = 48; #endif return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev); } /* Returns true if a packet is spoofed */ static bool packet_is_spoofed(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *tunnel) { const struct ipv6hdr *ipv6h; if (tunnel->dev->priv_flags & IFF_ISATAP) { if (!isatap_chksrc(skb, iph, tunnel)) return true; return false; } if (tunnel->dev->flags & IFF_POINTOPOINT) return false; ipv6h = ipv6_hdr(skb); if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) { net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n", &iph->saddr, &ipv6h->saddr, &iph->daddr, &ipv6h->daddr); return true; } if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr))) return false; if (only_dnatted(tunnel, &ipv6h->daddr)) return false; net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n", &iph->saddr, &ipv6h->saddr, &iph->daddr, &ipv6h->daddr); return true; } static int ipip6_rcv(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct ip_tunnel *tunnel; int sifindex; int err; sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0; tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr, sifindex); if (tunnel) { if (tunnel->parms.iph.protocol != IPPROTO_IPV6 && tunnel->parms.iph.protocol != 0) goto out; skb->mac_header = skb->network_header; skb_reset_network_header(skb); IPCB(skb)->flags = 0; skb->dev = tunnel->dev; if (packet_is_spoofed(skb, iph, tunnel)) { DEV_STATS_INC(tunnel->dev, rx_errors); goto out; } if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6), !net_eq(tunnel->net, dev_net(tunnel->dev)))) goto out; /* skb can be uncloned in iptunnel_pull_header, so * old iph is no longer valid */ iph = (const struct iphdr *)skb_mac_header(skb); skb_reset_mac_header(skb); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", &iph->saddr, iph->tos); if (err > 1) { DEV_STATS_INC(tunnel->dev, rx_frame_errors); DEV_STATS_INC(tunnel->dev, rx_errors); goto out; } } dev_sw_netstats_rx_add(tunnel->dev, skb->len); netif_rx(skb); return 0; } /* no tunnel matched, let upstream know, ipsec may handle it */ return 1; out: kfree_skb(skb); return 0; } static const struct tnl_ptk_info ipip_tpi = { /* no tunnel info required for ipip. */ .proto = htons(ETH_P_IP), }; #if IS_ENABLED(CONFIG_MPLS) static const struct tnl_ptk_info mplsip_tpi = { /* no tunnel info required for mplsip. */ .proto = htons(ETH_P_MPLS_UC), }; #endif static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto) { const struct iphdr *iph; struct ip_tunnel *tunnel; int sifindex; sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0; iph = ip_hdr(skb); tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr, sifindex); if (tunnel) { const struct tnl_ptk_info *tpi; if (tunnel->parms.iph.protocol != ipproto && tunnel->parms.iph.protocol != 0) goto drop; if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; #if IS_ENABLED(CONFIG_MPLS) if (ipproto == IPPROTO_MPLS) tpi = &mplsip_tpi; else #endif tpi = &ipip_tpi; if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; skb_reset_mac_header(skb); return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error); } return 1; drop: kfree_skb(skb); return 0; } static int ipip_rcv(struct sk_buff *skb) { return sit_tunnel_rcv(skb, IPPROTO_IPIP); } #if IS_ENABLED(CONFIG_MPLS) static int mplsip_rcv(struct sk_buff *skb) { return sit_tunnel_rcv(skb, IPPROTO_MPLS); } #endif /* * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function * stores the embedded IPv4 address in v4dst and returns true. */ static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, __be32 *v4dst) { #ifdef CONFIG_IPV6_SIT_6RD if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix, tunnel->ip6rd.prefixlen)) { unsigned int pbw0, pbi0; int pbi1; u32 d; pbw0 = tunnel->ip6rd.prefixlen >> 5; pbi0 = tunnel->ip6rd.prefixlen & 0x1f; d = tunnel->ip6rd.relay_prefixlen < 32 ? (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >> tunnel->ip6rd.relay_prefixlen : 0; pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen; if (pbi1 > 0) d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >> (32 - pbi1); *v4dst = tunnel->ip6rd.relay_prefix | htonl(d); return true; } #else if (v6dst->s6_addr16[0] == htons(0x2002)) { /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */ memcpy(v4dst, &v6dst->s6_addr16[1], 4); return true; } #endif return false; } static inline __be32 try_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst) { __be32 dst = 0; check_6rd(tunnel, v6dst, &dst); return dst; } static bool ipip6_tunnel_dst_find(struct sk_buff *skb, __be32 *dst, bool is_isatap) { const struct ipv6hdr *iph6 = ipv6_hdr(skb); struct neighbour *neigh = NULL; const struct in6_addr *addr6; bool found = false; int addr_type; if (skb_dst(skb)) neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (!neigh) { net_dbg_ratelimited("nexthop == NULL\n"); return false; } addr6 = (const struct in6_addr *)&neigh->primary_key; addr_type = ipv6_addr_type(addr6); if (is_isatap) { if ((addr_type & IPV6_ADDR_UNICAST) && ipv6_addr_is_isatap(addr6)) { *dst = addr6->s6_addr32[3]; found = true; } } else { if (addr_type == IPV6_ADDR_ANY) { addr6 = &ipv6_hdr(skb)->daddr; addr_type = ipv6_addr_type(addr6); } if ((addr_type & IPV6_ADDR_COMPATv4) != 0) { *dst = addr6->s6_addr32[3]; found = true; } } neigh_release(neigh); return found; } /* * This function assumes it is being called from dev_queue_xmit() * and that skb is filled properly by that function. */ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tiph = &tunnel->parms.iph; const struct ipv6hdr *iph6 = ipv6_hdr(skb); u8 tos = tunnel->parms.iph.tos; __be16 df = tiph->frag_off; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; struct flowi4 fl4; int mtu; u8 ttl; u8 protocol = IPPROTO_IPV6; int t_hlen = tunnel->hlen + sizeof(struct iphdr); if (tos == 1) tos = ipv6_get_dsfield(iph6); /* ISATAP (RFC4214) - must come before 6to4 */ if ((dev->priv_flags & IFF_ISATAP) && !ipip6_tunnel_dst_find(skb, &dst, true)) goto tx_error; if (!dst) dst = try_6rd(tunnel, &iph6->daddr); if (!dst && !ipip6_tunnel_dst_find(skb, &dst, false)) goto tx_error; flowi4_init_output(&fl4, tunnel->parms.link, tunnel->fwmark, tos & INET_DSCP_MASK, RT_SCOPE_UNIVERSE, IPPROTO_IPV6, 0, dst, tiph->saddr, 0, 0, sock_net_uid(tunnel->net, NULL)); rt = dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr); if (!rt) { rt = ip_route_output_flow(tunnel->net, &fl4, NULL); if (IS_ERR(rt)) { DEV_STATS_INC(dev, tx_carrier_errors); goto tx_error_icmp; } dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr); } if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { ip_rt_put(rt); DEV_STATS_INC(dev, tx_carrier_errors); goto tx_error_icmp; } tdev = rt->dst.dev; if (tdev == dev) { ip_rt_put(rt); DEV_STATS_INC(dev, collisions); goto tx_error; } if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4)) { ip_rt_put(rt); goto tx_error; } if (df) { mtu = dst_mtu(&rt->dst) - t_hlen; if (mtu < IPV4_MIN_MTU) { DEV_STATS_INC(dev, collisions); ip_rt_put(rt); goto tx_error; } if (mtu < IPV6_MIN_MTU) { mtu = IPV6_MIN_MTU; df = 0; } if (tunnel->parms.iph.daddr) skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->len > mtu && !skb_is_gso(skb)) { icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; } } if (tunnel->err_count > 0) { if (time_before(jiffies, tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { tunnel->err_count--; dst_link_failure(skb); } else tunnel->err_count = 0; } /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom = LL_RESERVED_SPACE(tdev) + t_hlen; if (skb_headroom(skb) < max_headroom || skb_shared(skb) || (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); DEV_STATS_INC(dev, tx_dropped); kfree_skb(skb); return NETDEV_TX_OK; } if (skb->sk) skb_set_owner_w(new_skb, skb->sk); dev_kfree_skb(skb); skb = new_skb; iph6 = ipv6_hdr(skb); } ttl = tiph->ttl; if (ttl == 0) ttl = iph6->hop_limit; tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0) { ip_rt_put(rt); goto tx_error; } skb_set_inner_ipproto(skb, IPPROTO_IPV6); iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)), 0); return NETDEV_TX_OK; tx_error_icmp: dst_link_failure(skb); tx_error: kfree_skb(skb); DEV_STATS_INC(dev, tx_errors); return NETDEV_TX_OK; } static netdev_tx_t sit_tunnel_xmit__(struct sk_buff *skb, struct net_device *dev, u8 ipproto) { struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tiph = &tunnel->parms.iph; if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4)) goto tx_error; skb_set_inner_ipproto(skb, ipproto); ip_tunnel_xmit(skb, dev, tiph, ipproto); return NETDEV_TX_OK; tx_error: kfree_skb(skb); DEV_STATS_INC(dev, tx_errors); return NETDEV_TX_OK; } static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { if (!pskb_inet_may_pull(skb)) goto tx_err; switch (skb->protocol) { case htons(ETH_P_IP): sit_tunnel_xmit__(skb, dev, IPPROTO_IPIP); break; case htons(ETH_P_IPV6): ipip6_tunnel_xmit(skb, dev); break; #if IS_ENABLED(CONFIG_MPLS) case htons(ETH_P_MPLS_UC): sit_tunnel_xmit__(skb, dev, IPPROTO_MPLS); break; #endif default: goto tx_err; } return NETDEV_TX_OK; tx_err: DEV_STATS_INC(dev, tx_errors); kfree_skb(skb); return NETDEV_TX_OK; } static void ipip6_tunnel_bind_dev(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); int t_hlen = tunnel->hlen + sizeof(struct iphdr); struct net_device *tdev = NULL; int hlen = LL_MAX_HEADER; const struct iphdr *iph; struct flowi4 fl4; iph = &tunnel->parms.iph; if (iph->daddr) { struct rtable *rt = ip_route_output_ports(tunnel->net, &fl4, NULL, iph->daddr, iph->saddr, 0, 0, IPPROTO_IPV6, iph->tos & INET_DSCP_MASK, tunnel->parms.link); if (!IS_ERR(rt)) { tdev = rt->dst.dev; ip_rt_put(rt); } dev->flags |= IFF_POINTOPOINT; } if (!tdev && tunnel->parms.link) tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); if (tdev && !netif_is_l3_master(tdev)) { int mtu; mtu = tdev->mtu - t_hlen; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; WRITE_ONCE(dev->mtu, mtu); hlen = tdev->hard_header_len + tdev->needed_headroom; } dev->needed_headroom = t_hlen + hlen; } static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm_kern *p, __u32 fwmark) { struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); ipip6_tunnel_unlink(sitn, t); synchronize_net(); t->parms.iph.saddr = p->iph.saddr; t->parms.iph.daddr = p->iph.daddr; __dev_addr_set(t->dev, &p->iph.saddr, 4); memcpy(t->dev->broadcast, &p->iph.daddr, 4); ipip6_tunnel_link(sitn, t); t->parms.iph.ttl = p->iph.ttl; t->parms.iph.tos = p->iph.tos; t->parms.iph.frag_off = p->iph.frag_off; if (t->parms.link != p->link || t->fwmark != fwmark) { t->parms.link = p->link; t->fwmark = fwmark; ipip6_tunnel_bind_dev(t->dev); } dst_cache_reset(&t->dst_cache); netdev_state_change(t->dev); } #ifdef CONFIG_IPV6_SIT_6RD static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, struct ip_tunnel_6rd *ip6rd) { struct in6_addr prefix; __be32 relay_prefix; if (ip6rd->relay_prefixlen > 32 || ip6rd->prefixlen + (32 - ip6rd->relay_prefixlen) > 64) return -EINVAL; ipv6_addr_prefix(&prefix, &ip6rd->prefix, ip6rd->prefixlen); if (!ipv6_addr_equal(&prefix, &ip6rd->prefix)) return -EINVAL; if (ip6rd->relay_prefixlen) relay_prefix = ip6rd->relay_prefix & htonl(0xffffffffUL << (32 - ip6rd->relay_prefixlen)); else relay_prefix = 0; if (relay_prefix != ip6rd->relay_prefix) return -EINVAL; t->ip6rd.prefix = prefix; t->ip6rd.relay_prefix = relay_prefix; t->ip6rd.prefixlen = ip6rd->prefixlen; t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen; dst_cache_reset(&t->dst_cache); netdev_state_change(t->dev); return 0; } static int ipip6_tunnel_get6rd(struct net_device *dev, struct ip_tunnel_parm __user *data) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm_kern p; struct ip_tunnel_6rd ip6rd; if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { if (!ip_tunnel_parm_from_user(&p, data)) return -EFAULT; t = ipip6_tunnel_locate(t->net, &p, 0); } if (!t) t = netdev_priv(dev); ip6rd.prefix = t->ip6rd.prefix; ip6rd.relay_prefix = t->ip6rd.relay_prefix; ip6rd.prefixlen = t->ip6rd.prefixlen; ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; if (copy_to_user(data, &ip6rd, sizeof(ip6rd))) return -EFAULT; return 0; } static int ipip6_tunnel_6rdctl(struct net_device *dev, struct ip_tunnel_6rd __user *data, int cmd) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_6rd ip6rd; int err; if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (copy_from_user(&ip6rd, data, sizeof(ip6rd))) return -EFAULT; if (cmd != SIOCDEL6RD) { err = ipip6_tunnel_update_6rd(t, &ip6rd); if (err < 0) return err; } else ipip6_tunnel_clone_6rd(dev, dev_to_sit_net(dev)); return 0; } #endif /* CONFIG_IPV6_SIT_6RD */ static bool ipip6_valid_ip_proto(u8 ipproto) { return ipproto == IPPROTO_IPV6 || ipproto == IPPROTO_IPIP || #if IS_ENABLED(CONFIG_MPLS) ipproto == IPPROTO_MPLS || #endif ipproto == 0; } static int __ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm_kern *p) { if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (!ipip6_valid_ip_proto(p->iph.protocol)) return -EINVAL; if (p->iph.version != 4 || p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF))) return -EINVAL; if (p->iph.ttl) p->iph.frag_off |= htons(IP_DF); return 0; } static int ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm_kern *p) { struct ip_tunnel *t = netdev_priv(dev); if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) t = ipip6_tunnel_locate(t->net, p, 0); if (!t) t = netdev_priv(dev); memcpy(p, &t->parms, sizeof(*p)); return 0; } static int ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm_kern *p) { struct ip_tunnel *t = netdev_priv(dev); int err; err = __ipip6_tunnel_ioctl_validate(t->net, p); if (err) return err; t = ipip6_tunnel_locate(t->net, p, 1); if (!t) return -ENOBUFS; return 0; } static int ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm_kern *p) { struct ip_tunnel *t = netdev_priv(dev); int err; err = __ipip6_tunnel_ioctl_validate(t->net, p); if (err) return err; t = ipip6_tunnel_locate(t->net, p, 0); if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { if (!t) return -ENOENT; } else { if (t) { if (t->dev != dev) return -EEXIST; } else { if (((dev->flags & IFF_POINTOPOINT) && !p->iph.daddr) || (!(dev->flags & IFF_POINTOPOINT) && p->iph.daddr)) return -EINVAL; t = netdev_priv(dev); } ipip6_tunnel_update(t, p, t->fwmark); } return 0; } static int ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm_kern *p) { struct ip_tunnel *t = netdev_priv(dev); if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { t = ipip6_tunnel_locate(t->net, p, 0); if (!t) return -ENOENT; if (t == netdev_priv(dev_to_sit_net(dev)->fb_tunnel_dev)) return -EPERM; dev = t->dev; } unregister_netdevice(dev); return 0; } static int ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd) { switch (cmd) { case SIOCGETTUNNEL: return ipip6_tunnel_get(dev, p); case SIOCADDTUNNEL: return ipip6_tunnel_add(dev, p); case SIOCCHGTUNNEL: return ipip6_tunnel_change(dev, p); case SIOCDELTUNNEL: return ipip6_tunnel_del(dev, p); default: return -EINVAL; } } static int ipip6_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd) { switch (cmd) { case SIOCGETTUNNEL: case SIOCADDTUNNEL: case SIOCCHGTUNNEL: case SIOCDELTUNNEL: return ip_tunnel_siocdevprivate(dev, ifr, data, cmd); case SIOCGETPRL: return ipip6_tunnel_get_prl(dev, data); case SIOCADDPRL: case SIOCDELPRL: case SIOCCHGPRL: return ipip6_tunnel_prl_ctl(dev, data, cmd); #ifdef CONFIG_IPV6_SIT_6RD case SIOCGET6RD: return ipip6_tunnel_get6rd(dev, data); case SIOCADD6RD: case SIOCCHG6RD: case SIOCDEL6RD: return ipip6_tunnel_6rdctl(dev, data, cmd); #endif default: return -EINVAL; } } static const struct net_device_ops ipip6_netdev_ops = { .ndo_init = ipip6_tunnel_init, .ndo_uninit = ipip6_tunnel_uninit, .ndo_start_xmit = sit_tunnel_xmit, .ndo_siocdevprivate = ipip6_tunnel_siocdevprivate, .ndo_get_iflink = ip_tunnel_get_iflink, .ndo_tunnel_ctl = ipip6_tunnel_ctl, }; static void ipip6_dev_free(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); dst_cache_destroy(&tunnel->dst_cache); } #define SIT_FEATURES (NETIF_F_SG | \ NETIF_F_FRAGLIST | \ NETIF_F_HIGHDMA | \ NETIF_F_GSO_SOFTWARE | \ NETIF_F_HW_CSUM) static void ipip6_tunnel_setup(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); int t_hlen = tunnel->hlen + sizeof(struct iphdr); dev->netdev_ops = &ipip6_netdev_ops; dev->header_ops = &ip_tunnel_header_ops; dev->needs_free_netdev = true; dev->priv_destructor = ipip6_dev_free; dev->type = ARPHRD_SIT; dev->mtu = ETH_DATA_LEN - t_hlen; dev->min_mtu = IPV6_MIN_MTU; dev->max_mtu = IP6_MAX_MTU - t_hlen; dev->flags = IFF_NOARP; netif_keep_dst(dev); dev->addr_len = 4; dev->lltx = true; dev->features |= SIT_FEATURES; dev->hw_features |= SIT_FEATURES; dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; } static int ipip6_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); int err; tunnel->dev = dev; strcpy(tunnel->parms.name, dev->name); ipip6_tunnel_bind_dev(dev); err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); if (err) return err; netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); netdev_lockdep_set_classes(dev); return 0; } static void __net_init ipip6_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); iph->version = 4; iph->protocol = IPPROTO_IPV6; iph->ihl = 5; iph->ttl = 64; rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); } static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { u8 proto; if (!data || !data[IFLA_IPTUN_PROTO]) return 0; proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); if (!ipip6_valid_ip_proto(proto)) return -EINVAL; return 0; } static void ipip6_netlink_parms(struct nlattr *data[], struct ip_tunnel_parm_kern *parms, __u32 *fwmark) { memset(parms, 0, sizeof(*parms)); parms->iph.version = 4; parms->iph.protocol = IPPROTO_IPV6; parms->iph.ihl = 5; parms->iph.ttl = 64; if (!data) return; ip_tunnel_netlink_parms(data, parms); if (data[IFLA_IPTUN_FWMARK]) *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); } #ifdef CONFIG_IPV6_SIT_6RD /* This function returns true when 6RD attributes are present in the nl msg */ static bool ipip6_netlink_6rd_parms(struct nlattr *data[], struct ip_tunnel_6rd *ip6rd) { bool ret = false; memset(ip6rd, 0, sizeof(*ip6rd)); if (!data) return ret; if (data[IFLA_IPTUN_6RD_PREFIX]) { ret = true; ip6rd->prefix = nla_get_in6_addr(data[IFLA_IPTUN_6RD_PREFIX]); } if (data[IFLA_IPTUN_6RD_RELAY_PREFIX]) { ret = true; ip6rd->relay_prefix = nla_get_be32(data[IFLA_IPTUN_6RD_RELAY_PREFIX]); } if (data[IFLA_IPTUN_6RD_PREFIXLEN]) { ret = true; ip6rd->prefixlen = nla_get_u16(data[IFLA_IPTUN_6RD_PREFIXLEN]); } if (data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]) { ret = true; ip6rd->relay_prefixlen = nla_get_u16(data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]); } return ret; } #endif static int ipip6_newlink(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct nlattr **data = params->data; struct nlattr **tb = params->tb; struct ip_tunnel *nt; struct ip_tunnel_encap ipencap; #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd ip6rd; #endif struct net *net; int err; net = params->link_net ? : dev_net(dev); nt = netdev_priv(dev); nt->net = net; if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { err = ip_tunnel_encap_setup(nt, &ipencap); if (err < 0) return err; } ipip6_netlink_parms(data, &nt->parms, &nt->fwmark); if (ipip6_tunnel_locate(net, &nt->parms, 0)) return -EEXIST; err = ipip6_tunnel_create(dev); if (err < 0) return err; if (tb[IFLA_MTU]) { u32 mtu = nla_get_u32(tb[IFLA_MTU]); if (mtu >= IPV6_MIN_MTU && mtu <= IP6_MAX_MTU - dev->hard_header_len) dev->mtu = mtu; } #ifdef CONFIG_IPV6_SIT_6RD if (ipip6_netlink_6rd_parms(data, &ip6rd)) { err = ipip6_tunnel_update_6rd(nt, &ip6rd); if (err < 0) unregister_netdevice_queue(dev, NULL); } #endif return err; } static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_encap ipencap; struct ip_tunnel_parm_kern p; struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd ip6rd; #endif __u32 fwmark = t->fwmark; int err; if (dev == sitn->fb_tunnel_dev) return -EINVAL; if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { err = ip_tunnel_encap_setup(t, &ipencap); if (err < 0) return err; } ipip6_netlink_parms(data, &p, &fwmark); if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) || (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) return -EINVAL; t = ipip6_tunnel_locate(net, &p, 0); if (t) { if (t->dev != dev) return -EEXIST; } else t = netdev_priv(dev); ipip6_tunnel_update(t, &p, fwmark); #ifdef CONFIG_IPV6_SIT_6RD if (ipip6_netlink_6rd_parms(data, &ip6rd)) return ipip6_tunnel_update_6rd(t, &ip6rd); #endif return 0; } static size_t ipip6_get_size(const struct net_device *dev) { return /* IFLA_IPTUN_LINK */ nla_total_size(4) + /* IFLA_IPTUN_LOCAL */ nla_total_size(4) + /* IFLA_IPTUN_REMOTE */ nla_total_size(4) + /* IFLA_IPTUN_TTL */ nla_total_size(1) + /* IFLA_IPTUN_TOS */ nla_total_size(1) + /* IFLA_IPTUN_PMTUDISC */ nla_total_size(1) + /* IFLA_IPTUN_FLAGS */ nla_total_size(2) + /* IFLA_IPTUN_PROTO */ nla_total_size(1) + #ifdef CONFIG_IPV6_SIT_6RD /* IFLA_IPTUN_6RD_PREFIX */ nla_total_size(sizeof(struct in6_addr)) + /* IFLA_IPTUN_6RD_RELAY_PREFIX */ nla_total_size(4) + /* IFLA_IPTUN_6RD_PREFIXLEN */ nla_total_size(2) + /* IFLA_IPTUN_6RD_RELAY_PREFIXLEN */ nla_total_size(2) + #endif /* IFLA_IPTUN_ENCAP_TYPE */ nla_total_size(2) + /* IFLA_IPTUN_ENCAP_FLAGS */ nla_total_size(2) + /* IFLA_IPTUN_ENCAP_SPORT */ nla_total_size(2) + /* IFLA_IPTUN_ENCAP_DPORT */ nla_total_size(2) + /* IFLA_IPTUN_FWMARK */ nla_total_size(4) + 0; } static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel_parm_kern *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, !!(parm->iph.frag_off & htons(IP_DF))) || nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) || nla_put_be16(skb, IFLA_IPTUN_FLAGS, ip_tunnel_flags_to_be16(parm->i_flags)) || nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark)) goto nla_put_failure; #ifdef CONFIG_IPV6_SIT_6RD if (nla_put_in6_addr(skb, IFLA_IPTUN_6RD_PREFIX, &tunnel->ip6rd.prefix) || nla_put_in_addr(skb, IFLA_IPTUN_6RD_RELAY_PREFIX, tunnel->ip6rd.relay_prefix) || nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN, tunnel->ip6rd.prefixlen) || nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN, tunnel->ip6rd.relay_prefixlen)) goto nla_put_failure; #endif if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) || nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) || nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) || nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags)) goto nla_put_failure; return 0; nla_put_failure: return -EMSGSIZE; } static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_LINK] = { .type = NLA_U32 }, [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 }, [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 }, [IFLA_IPTUN_TTL] = { .type = NLA_U8 }, [IFLA_IPTUN_TOS] = { .type = NLA_U8 }, [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 }, [IFLA_IPTUN_FLAGS] = { .type = NLA_U16 }, [IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, #ifdef CONFIG_IPV6_SIT_6RD [IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) }, [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 }, [IFLA_IPTUN_6RD_PREFIXLEN] = { .type = NLA_U16 }, [IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NLA_U16 }, #endif [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 }, }; static void ipip6_dellink(struct net_device *dev, struct list_head *head) { struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); if (dev != sitn->fb_tunnel_dev) unregister_netdevice_queue(dev, head); } static struct rtnl_link_ops sit_link_ops __read_mostly = { .kind = "sit", .maxtype = IFLA_IPTUN_MAX, .policy = ipip6_policy, .priv_size = sizeof(struct ip_tunnel), .setup = ipip6_tunnel_setup, .validate = ipip6_validate, .newlink = ipip6_newlink, .changelink = ipip6_changelink, .get_size = ipip6_get_size, .fill_info = ipip6_fill_info, .dellink = ipip6_dellink, .get_link_net = ip_tunnel_get_link_net, }; static struct xfrm_tunnel sit_handler __read_mostly = { .handler = ipip6_rcv, .err_handler = ipip6_err, .priority = 1, }; static struct xfrm_tunnel ipip_handler __read_mostly = { .handler = ipip_rcv, .err_handler = ipip6_err, .priority = 2, }; #if IS_ENABLED(CONFIG_MPLS) static struct xfrm_tunnel mplsip_handler __read_mostly = { .handler = mplsip_rcv, .err_handler = ipip6_err, .priority = 2, }; #endif static void __net_exit sit_exit_rtnl_net(struct net *net, struct list_head *head) { struct sit_net *sitn = net_generic(net, sit_net_id); struct net_device *dev, *aux; int prio; for_each_netdev_safe(net, dev, aux) if (dev->rtnl_link_ops == &sit_link_ops) unregister_netdevice_queue(dev, head); for (prio = 0; prio < 4; prio++) { int h; for (h = 0; h < (prio ? IP6_SIT_HASH_SIZE : 1); h++) { struct ip_tunnel *t; t = rtnl_net_dereference(net, sitn->tunnels[prio][h]); while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ if (!net_eq(dev_net(t->dev), net)) unregister_netdevice_queue(t->dev, head); t = rtnl_net_dereference(net, t->next); } } } } static int __net_init sit_init_net(struct net *net) { struct sit_net *sitn = net_generic(net, sit_net_id); struct ip_tunnel *t; int err; sitn->tunnels[0] = sitn->tunnels_wc; sitn->tunnels[1] = sitn->tunnels_l; sitn->tunnels[2] = sitn->tunnels_r; sitn->tunnels[3] = sitn->tunnels_r_l; if (!net_has_fallback_tunnels(net)) return 0; sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0", NET_NAME_UNKNOWN, ipip6_tunnel_setup); if (!sitn->fb_tunnel_dev) { err = -ENOMEM; goto err_alloc_dev; } dev_net_set(sitn->fb_tunnel_dev, net); sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops; /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ sitn->fb_tunnel_dev->netns_immutable = true; t = netdev_priv(sitn->fb_tunnel_dev); t->net = net; err = register_netdev(sitn->fb_tunnel_dev); if (err) goto err_reg_dev; ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); strcpy(t->parms.name, sitn->fb_tunnel_dev->name); return 0; err_reg_dev: free_netdev(sitn->fb_tunnel_dev); err_alloc_dev: return err; } static struct pernet_operations sit_net_ops = { .init = sit_init_net, .exit_rtnl = sit_exit_rtnl_net, .id = &sit_net_id, .size = sizeof(struct sit_net), }; static void __exit sit_cleanup(void) { rtnl_link_unregister(&sit_link_ops); xfrm4_tunnel_deregister(&sit_handler, AF_INET6); xfrm4_tunnel_deregister(&ipip_handler, AF_INET); #if IS_ENABLED(CONFIG_MPLS) xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS); #endif unregister_pernet_device(&sit_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ } static int __init sit_init(void) { int err; pr_info("IPv6, IPv4 and MPLS over IPv4 tunneling driver\n"); err = register_pernet_device(&sit_net_ops); if (err < 0) return err; err = xfrm4_tunnel_register(&sit_handler, AF_INET6); if (err < 0) { pr_info("%s: can't register ip6ip4\n", __func__); goto xfrm_tunnel_failed; } err = xfrm4_tunnel_register(&ipip_handler, AF_INET); if (err < 0) { pr_info("%s: can't register ip4ip4\n", __func__); goto xfrm_tunnel4_failed; } #if IS_ENABLED(CONFIG_MPLS) err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS); if (err < 0) { pr_info("%s: can't register mplsip\n", __func__); goto xfrm_tunnel_mpls_failed; } #endif err = rtnl_link_register(&sit_link_ops); if (err < 0) goto rtnl_link_failed; out: return err; rtnl_link_failed: #if IS_ENABLED(CONFIG_MPLS) xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS); xfrm_tunnel_mpls_failed: #endif xfrm4_tunnel_deregister(&ipip_handler, AF_INET); xfrm_tunnel4_failed: xfrm4_tunnel_deregister(&sit_handler, AF_INET6); xfrm_tunnel_failed: unregister_pernet_device(&sit_net_ops); goto out; } module_init(sit_init); module_exit(sit_cleanup); MODULE_DESCRIPTION("IPv6-in-IPv4 tunnel SIT driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("sit"); MODULE_ALIAS_NETDEV("sit0");
44 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 /* SPDX-License-Identifier: GPL-2.0-only */ /* * V9FS definitions. * * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> */ #ifndef FS_9P_V9FS_H #define FS_9P_V9FS_H #include <linux/backing-dev.h> #include <linux/netfs.h> /** * enum p9_session_flags - option flags for each 9P session * @V9FS_PROTO_2000U: whether or not to use 9P2000.u extensions * @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy * @V9FS_ACCESS_USER: a new attach will be issued for every user (default) * @V9FS_ACCESS_CLIENT: Just like user, but access check is performed on client. * @V9FS_ACCESS_ANY: use a single attach for all users * @V9FS_ACCESS_MASK: bit mask of different ACCESS options * @V9FS_POSIX_ACL: POSIX ACLs are enforced * * Session flags reflect options selected by users at mount time */ #define V9FS_ACCESS_ANY (V9FS_ACCESS_SINGLE | \ V9FS_ACCESS_USER | \ V9FS_ACCESS_CLIENT) #define V9FS_ACCESS_MASK V9FS_ACCESS_ANY #define V9FS_ACL_MASK V9FS_POSIX_ACL enum p9_session_flags { V9FS_PROTO_2000U = 0x01, V9FS_PROTO_2000L = 0x02, V9FS_ACCESS_SINGLE = 0x04, V9FS_ACCESS_USER = 0x08, V9FS_ACCESS_CLIENT = 0x10, V9FS_POSIX_ACL = 0x20, V9FS_NO_XATTR = 0x40, V9FS_IGNORE_QV = 0x80, /* ignore qid.version for cache hints */ V9FS_DIRECT_IO = 0x100, V9FS_SYNC = 0x200 }; /** * enum p9_cache_shortcuts - human readable cache preferences * @CACHE_SC_NONE: disable all caches * @CACHE_SC_READAHEAD: only provide caching for readahead * @CACHE_SC_MMAP: provide caching to enable mmap * @CACHE_SC_LOOSE: non-coherent caching for files and meta data * @CACHE_SC_FSCACHE: persistent non-coherent caching for files and meta-data * */ enum p9_cache_shortcuts { CACHE_SC_NONE = 0b00000000, CACHE_SC_READAHEAD = 0b00000001, CACHE_SC_MMAP = 0b00000101, CACHE_SC_LOOSE = 0b00001111, CACHE_SC_FSCACHE = 0b10001111, }; /** * enum p9_cache_bits - possible values of ->cache * @CACHE_NONE: caches disabled * @CACHE_FILE: file caching (open to close) * @CACHE_META: meta-data and directory caching * @CACHE_WRITEBACK: write-back caching for files * @CACHE_LOOSE: don't check cache consistency * @CACHE_FSCACHE: local persistent caches * */ enum p9_cache_bits { CACHE_NONE = 0b00000000, CACHE_FILE = 0b00000001, CACHE_META = 0b00000010, CACHE_WRITEBACK = 0b00000100, CACHE_LOOSE = 0b00001000, CACHE_FSCACHE = 0b10000000, }; /** * struct v9fs_session_info - per-instance session information * @flags: session options of type &p9_session_flags * @nodev: set to 1 to disable device mapping * @debug: debug level * @afid: authentication handle * @cache: cache mode of type &p9_cache_bits * @cachetag: the tag of the cache associated with this session * @fscache: session cookie associated with FS-Cache * @uname: string user name to mount hierarchy as * @aname: mount specifier for remote hierarchy * @maxdata: maximum data to be sent/recvd per protocol message * @dfltuid: default numeric userid to mount hierarchy as * @dfltgid: default numeric groupid to mount hierarchy as * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy * @clnt: reference to 9P network client instantiated for this session * @slist: reference to list of registered 9p sessions * * This structure holds state for each session instance established during * a sys_mount() . * * Bugs: there seems to be a lot of state which could be condensed and/or * removed. */ struct v9fs_session_info { /* options */ unsigned int flags; unsigned char nodev; unsigned short debug; unsigned int afid; unsigned int cache; #ifdef CONFIG_9P_FSCACHE char *cachetag; struct fscache_volume *fscache; #endif char *uname; /* user name to mount as */ char *aname; /* name of remote hierarchy being mounted */ unsigned int maxdata; /* max data for client interface */ kuid_t dfltuid; /* default uid/muid for legacy support */ kgid_t dfltgid; /* default gid for legacy support */ kuid_t uid; /* if ACCESS_SINGLE, the uid that has access */ struct p9_client *clnt; /* 9p client */ struct list_head slist; /* list of sessions registered with v9fs */ struct rw_semaphore rename_sem; long session_lock_timeout; /* retry interval for blocking locks */ }; /* cache_validity flags */ #define V9FS_INO_INVALID_ATTR 0x01 struct v9fs_inode { struct netfs_inode netfs; /* Netfslib context and vfs inode */ struct p9_qid qid; unsigned int cache_validity; struct mutex v_mutex; }; static inline struct v9fs_inode *V9FS_I(const struct inode *inode) { return container_of(inode, struct v9fs_inode, netfs.inode); } static inline struct fscache_cookie *v9fs_inode_cookie(struct v9fs_inode *v9inode) { #ifdef CONFIG_9P_FSCACHE return netfs_i_cookie(&v9inode->netfs); #else return NULL; #endif } static inline struct fscache_volume *v9fs_session_cache(struct v9fs_session_info *v9ses) { #ifdef CONFIG_9P_FSCACHE return v9ses->fscache; #else return NULL; #endif } extern int v9fs_show_options(struct seq_file *m, struct dentry *root); struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, const char *dev_name, char *data); extern void v9fs_session_close(struct v9fs_session_info *v9ses); extern void v9fs_session_cancel(struct v9fs_session_info *v9ses); extern void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses); extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d); extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d); extern int v9fs_vfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb, int new); extern const struct inode_operations v9fs_dir_inode_operations_dotl; extern const struct inode_operations v9fs_file_inode_operations_dotl; extern const struct inode_operations v9fs_symlink_inode_operations_dotl; extern const struct netfs_request_ops v9fs_req_ops; extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb, int new); /* other default globals */ #define V9FS_PORT 564 #define V9FS_DEFUSER "nobody" #define V9FS_DEFANAME "" #define V9FS_DEFUID KUIDT_INIT(-2) #define V9FS_DEFGID KGIDT_INIT(-2) static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode) { return inode->i_sb->s_fs_info; } static inline struct v9fs_session_info *v9fs_dentry2v9ses(const struct dentry *dentry) { return dentry->d_sb->s_fs_info; } static inline int v9fs_proto_dotu(struct v9fs_session_info *v9ses) { return v9ses->flags & V9FS_PROTO_2000U; } static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses) { return v9ses->flags & V9FS_PROTO_2000L; } /** * v9fs_get_inode_from_fid - Helper routine to populate an inode by * issuing a attribute request * @v9ses: session information * @fid: fid to issue attribute request for * @sb: superblock on which to create inode * */ static inline struct inode * v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb) { if (v9fs_proto_dotl(v9ses)) return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 0); else return v9fs_inode_from_fid(v9ses, fid, sb, 0); } /** * v9fs_get_new_inode_from_fid - Helper routine to populate an inode by * issuing a attribute request * @v9ses: session information * @fid: fid to issue attribute request for * @sb: superblock on which to create inode * */ static inline struct inode * v9fs_get_new_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb) { if (v9fs_proto_dotl(v9ses)) return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 1); else return v9fs_inode_from_fid(v9ses, fid, sb, 1); } #endif
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright 2011-2014 Autronica Fire and Security AS * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se * * include file for HSR and PRP. */ #ifndef __HSR_PRIVATE_H #define __HSR_PRIVATE_H #include <linux/netdevice.h> #include <linux/list.h> #include <linux/if_vlan.h> #include <linux/if_hsr.h> /* Time constants as specified in the HSR specification (IEC-62439-3 2010) * Table 8. * All values in milliseconds. */ #define HSR_LIFE_CHECK_INTERVAL 2000 /* ms */ #define HSR_NODE_FORGET_TIME 60000 /* ms */ #define HSR_PROXY_NODE_FORGET_TIME 60000 /* ms */ #define HSR_ANNOUNCE_INTERVAL 100 /* ms */ #define HSR_ENTRY_FORGET_TIME 400 /* ms */ /* By how much may slave1 and slave2 timestamps of latest received frame from * each node differ before we notify of communication problem? */ #define MAX_SLAVE_DIFF 3000 /* ms */ #define HSR_SEQNR_START (USHRT_MAX - 1024) #define HSR_SUP_SEQNR_START (HSR_SEQNR_START / 2) /* How often shall we check for broken ring and remove node entries older than * HSR_NODE_FORGET_TIME? */ #define PRUNE_PERIOD 3000 /* ms */ #define PRUNE_PROXY_PERIOD 3000 /* ms */ #define HSR_TLV_EOT 0 /* End of TLVs */ #define HSR_TLV_ANNOUNCE 22 #define HSR_TLV_LIFE_CHECK 23 /* PRP V1 life check for Duplicate discard */ #define PRP_TLV_LIFE_CHECK_DD 20 /* PRP V1 life check for Duplicate Accept */ #define PRP_TLV_LIFE_CHECK_DA 21 /* PRP V1 life redundancy box MAC address */ #define PRP_TLV_REDBOX_MAC 30 #define HSR_V1_SUP_LSDUSIZE 52 /* The helper functions below assumes that 'path' occupies the 4 most * significant bits of the 16-bit field shared by 'path' and 'LSDU_size' (or * equivalently, the 4 most significant bits of HSR tag byte 14). * * This is unclear in the IEC specification; its definition of MAC addresses * indicates the spec is written with the least significant bit first (to the * left). This, however, would mean that the LSDU field would be split in two * with the path field in-between, which seems strange. I'm guessing the MAC * address definition is in error. */ static inline void set_hsr_tag_path(struct hsr_tag *ht, u16 path) { ht->path_and_LSDU_size = htons((ntohs(ht->path_and_LSDU_size) & 0x0FFF) | (path << 12)); } static inline void set_hsr_tag_LSDU_size(struct hsr_tag *ht, u16 LSDU_size) { ht->path_and_LSDU_size = htons((ntohs(ht->path_and_LSDU_size) & 0xF000) | (LSDU_size & 0x0FFF)); } struct hsr_ethhdr { struct ethhdr ethhdr; struct hsr_tag hsr_tag; } __packed; struct hsr_vlan_ethhdr { struct vlan_ethhdr vlanhdr; struct hsr_tag hsr_tag; } __packed; struct hsr_sup_tlv { u8 HSR_TLV_type; u8 HSR_TLV_length; } __packed; /* HSR/PRP Supervision Frame data types. * Field names as defined in the IEC:2010 standard for HSR. */ struct hsr_sup_tag { __be16 path_and_HSR_ver; __be16 sequence_nr; struct hsr_sup_tlv tlv; } __packed; struct hsr_sup_payload { unsigned char macaddress_A[ETH_ALEN]; } __packed; static inline void set_hsr_stag_path(struct hsr_sup_tag *hst, u16 path) { set_hsr_tag_path((struct hsr_tag *)hst, path); } static inline void set_hsr_stag_HSR_ver(struct hsr_sup_tag *hst, u16 HSR_ver) { set_hsr_tag_LSDU_size((struct hsr_tag *)hst, HSR_ver); } struct hsrv0_ethhdr_sp { struct ethhdr ethhdr; struct hsr_sup_tag hsr_sup; } __packed; struct hsrv1_ethhdr_sp { struct ethhdr ethhdr; struct hsr_tag hsr; struct hsr_sup_tag hsr_sup; } __packed; /* PRP Redunancy Control Trailor (RCT). * As defined in IEC-62439-4:2012, the PRP RCT is really { sequence Nr, * Lan indentifier (LanId), LSDU_size and PRP_suffix = 0x88FB }. * * Field names as defined in the IEC:2012 standard for PRP. */ struct prp_rct { __be16 sequence_nr; __be16 lan_id_and_LSDU_size; __be16 PRP_suffix; } __packed; static inline u16 get_prp_LSDU_size(struct prp_rct *rct) { return ntohs(rct->lan_id_and_LSDU_size) & 0x0FFF; } static inline void set_prp_lan_id(struct prp_rct *rct, u16 lan_id) { rct->lan_id_and_LSDU_size = htons((ntohs(rct->lan_id_and_LSDU_size) & 0x0FFF) | (lan_id << 12)); } static inline void set_prp_LSDU_size(struct prp_rct *rct, u16 LSDU_size) { rct->lan_id_and_LSDU_size = htons((ntohs(rct->lan_id_and_LSDU_size) & 0xF000) | (LSDU_size & 0x0FFF)); } struct hsr_port { struct list_head port_list; struct net_device *dev; struct hsr_priv *hsr; enum hsr_port_type type; struct rcu_head rcu; unsigned char original_macaddress[ETH_ALEN]; }; struct hsr_frame_info; struct hsr_node; struct hsr_proto_ops { /* format and send supervision frame */ void (*send_sv_frame)(struct hsr_port *port, unsigned long *interval, const unsigned char addr[ETH_ALEN]); void (*handle_san_frame)(bool san, enum hsr_port_type port, struct hsr_node *node); bool (*drop_frame)(struct hsr_frame_info *frame, struct hsr_port *port); struct sk_buff * (*get_untagged_frame)(struct hsr_frame_info *frame, struct hsr_port *port); struct sk_buff * (*create_tagged_frame)(struct hsr_frame_info *frame, struct hsr_port *port); int (*fill_frame_info)(__be16 proto, struct sk_buff *skb, struct hsr_frame_info *frame); bool (*invalid_dan_ingress_frame)(__be16 protocol); void (*update_san_info)(struct hsr_node *node, bool is_sup); int (*register_frame_out)(struct hsr_port *port, struct hsr_frame_info *frame); }; struct hsr_self_node { unsigned char macaddress_A[ETH_ALEN]; unsigned char macaddress_B[ETH_ALEN]; struct rcu_head rcu_head; }; struct hsr_priv { struct rcu_head rcu_head; struct list_head ports; struct list_head node_db; /* Known HSR nodes */ struct list_head proxy_node_db; /* RedBox HSR proxy nodes */ struct hsr_self_node __rcu *self_node; /* MACs of slaves */ struct timer_list announce_timer; /* Supervision frame dispatch */ struct timer_list announce_proxy_timer; struct timer_list prune_timer; struct timer_list prune_proxy_timer; int announce_count; u16 sequence_nr; u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */ enum hsr_version prot_version; /* Indicate if HSRv0, HSRv1 or PRPv1 */ spinlock_t seqnr_lock; /* locking for sequence_nr */ spinlock_t list_lock; /* locking for node list */ struct hsr_proto_ops *proto_ops; #define PRP_LAN_ID 0x5 /* 0x1010 for A and 0x1011 for B. Bit 0 is set * based on SLAVE_A or SLAVE_B */ u8 net_id; /* for PRP, it occupies most significant 3 bits * of lan_id */ bool fwd_offloaded; /* Forwarding offloaded to HW */ bool redbox; /* Device supports HSR RedBox */ unsigned char macaddress_redbox[ETH_ALEN]; unsigned char sup_multicast_addr[ETH_ALEN] __aligned(sizeof(u16)); /* Align to u16 boundary to avoid unaligned access * in ether_addr_equal */ #ifdef CONFIG_DEBUG_FS struct dentry *node_tbl_root; #endif }; #define hsr_for_each_port(hsr, port) \ list_for_each_entry_rcu((port), &(hsr)->ports, port_list) #define hsr_for_each_port_rtnl(hsr, port) \ list_for_each_entry_rcu((port), &(hsr)->ports, port_list, lockdep_rtnl_is_held()) struct hsr_port *hsr_port_get_hsr(struct hsr_priv *hsr, enum hsr_port_type pt); /* Caller must ensure skb is a valid HSR frame */ static inline u16 hsr_get_skb_sequence_nr(struct sk_buff *skb) { struct hsr_ethhdr *hsr_ethhdr; hsr_ethhdr = (struct hsr_ethhdr *)skb_mac_header(skb); return ntohs(hsr_ethhdr->hsr_tag.sequence_nr); } static inline struct prp_rct *skb_get_PRP_rct(struct sk_buff *skb) { unsigned char *tail = skb_tail_pointer(skb) - HSR_HLEN; struct prp_rct *rct = (struct prp_rct *)tail; if (rct->PRP_suffix == htons(ETH_P_PRP)) return rct; return NULL; } /* Assume caller has confirmed this skb is PRP suffixed */ static inline u16 prp_get_skb_sequence_nr(struct prp_rct *rct) { return ntohs(rct->sequence_nr); } /* assume there is a valid rct */ static inline bool prp_check_lsdu_size(struct sk_buff *skb, struct prp_rct *rct, bool is_sup) { struct ethhdr *ethhdr; int expected_lsdu_size; if (is_sup) { expected_lsdu_size = HSR_V1_SUP_LSDUSIZE; } else { ethhdr = (struct ethhdr *)skb_mac_header(skb); expected_lsdu_size = skb->len - 14; if (ethhdr->h_proto == htons(ETH_P_8021Q)) expected_lsdu_size -= 4; } return (expected_lsdu_size == get_prp_LSDU_size(rct)); } #if IS_ENABLED(CONFIG_DEBUG_FS) void hsr_debugfs_rename(struct net_device *dev); void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev); void hsr_debugfs_term(struct hsr_priv *priv); void hsr_debugfs_create_root(void); void hsr_debugfs_remove_root(void); #else static inline void hsr_debugfs_rename(struct net_device *dev) { } static inline void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev) {} static inline void hsr_debugfs_term(struct hsr_priv *priv) {} static inline void hsr_debugfs_create_root(void) {} static inline void hsr_debugfs_remove_root(void) {} #endif #endif /* __HSR_PRIVATE_H */
26 7 7 7 7 7 165 165 1 154 138 138 154 134 133 134 101 101 64 48 27 79 79 79 78 26 43 27 26 43 43 15 15 34 28 3 4 8 1 1 1 1 1 1 1 1 5 1 1 3 5 4 1 1 1 1 25 25 25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 /* * * Copyright IBM Corporation, 2012 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> * * Cgroup v2 * Copyright (C) 2019 Red Hat, Inc. * Author: Giuseppe Scrivano <gscrivan@redhat.com> * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2.1 of the GNU Lesser General Public License * as published by the Free Software Foundation. * * This program is distributed in the hope that it would be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * */ #include <linux/cgroup.h> #include <linux/page_counter.h> #include <linux/slab.h> #include <linux/hugetlb.h> #include <linux/hugetlb_cgroup.h> #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) #define MEMFILE_IDX(val) (((val) >> 16) & 0xffff) #define MEMFILE_ATTR(val) ((val) & 0xffff) /* Use t->m[0] to encode the offset */ #define MEMFILE_OFFSET(t, m0) (((offsetof(t, m0) << 16) | sizeof_field(t, m0))) #define MEMFILE_OFFSET0(val) (((val) >> 16) & 0xffff) #define MEMFILE_FIELD_SIZE(val) ((val) & 0xffff) #define DFL_TMPL_SIZE ARRAY_SIZE(hugetlb_dfl_tmpl) #define LEGACY_TMPL_SIZE ARRAY_SIZE(hugetlb_legacy_tmpl) static struct hugetlb_cgroup *root_h_cgroup __read_mostly; static struct cftype *dfl_files; static struct cftype *legacy_files; static inline struct page_counter * __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx, bool rsvd) { if (rsvd) return &h_cg->rsvd_hugepage[idx]; return &h_cg->hugepage[idx]; } static inline struct page_counter * hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx) { return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false); } static inline struct page_counter * hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx) { return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true); } static inline struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s) { return s ? container_of(s, struct hugetlb_cgroup, css) : NULL; } static inline struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task) { return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id)); } static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg) { return (h_cg == root_h_cgroup); } static inline struct hugetlb_cgroup * parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg) { return hugetlb_cgroup_from_css(h_cg->css.parent); } static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) { struct hstate *h; for_each_hstate(h) { if (page_counter_read( hugetlb_cgroup_counter_from_cgroup(h_cg, hstate_index(h)))) return true; } return false; } static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup, struct hugetlb_cgroup *parent_h_cgroup) { int idx; for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) { struct page_counter *fault, *fault_parent = NULL; struct page_counter *rsvd, *rsvd_parent = NULL; unsigned long limit; if (parent_h_cgroup) { fault_parent = hugetlb_cgroup_counter_from_cgroup( parent_h_cgroup, idx); rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd( parent_h_cgroup, idx); } fault = hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx); rsvd = hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx); page_counter_init(fault, fault_parent, false); page_counter_init(rsvd, rsvd_parent, false); if (!cgroup_subsys_on_dfl(hugetlb_cgrp_subsys)) { fault->track_failcnt = true; rsvd->track_failcnt = true; } limit = round_down(PAGE_COUNTER_MAX, pages_per_huge_page(&hstates[idx])); VM_BUG_ON(page_counter_set_max(fault, limit)); VM_BUG_ON(page_counter_set_max(rsvd, limit)); } } static void hugetlb_cgroup_free(struct hugetlb_cgroup *h_cgroup) { int node; for_each_node(node) kfree(h_cgroup->nodeinfo[node]); kfree(h_cgroup); } static struct cgroup_subsys_state * hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) { struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css); struct hugetlb_cgroup *h_cgroup; int node; h_cgroup = kzalloc(struct_size(h_cgroup, nodeinfo, nr_node_ids), GFP_KERNEL); if (!h_cgroup) return ERR_PTR(-ENOMEM); if (!parent_h_cgroup) root_h_cgroup = h_cgroup; /* * TODO: this routine can waste much memory for nodes which will * never be onlined. It's better to use memory hotplug callback * function. */ for_each_node(node) { /* Set node_to_alloc to NUMA_NO_NODE for offline nodes. */ int node_to_alloc = node_state(node, N_NORMAL_MEMORY) ? node : NUMA_NO_NODE; h_cgroup->nodeinfo[node] = kzalloc_node(sizeof(struct hugetlb_cgroup_per_node), GFP_KERNEL, node_to_alloc); if (!h_cgroup->nodeinfo[node]) goto fail_alloc_nodeinfo; } hugetlb_cgroup_init(h_cgroup, parent_h_cgroup); return &h_cgroup->css; fail_alloc_nodeinfo: hugetlb_cgroup_free(h_cgroup); return ERR_PTR(-ENOMEM); } static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css) { hugetlb_cgroup_free(hugetlb_cgroup_from_css(css)); } /* * Should be called with hugetlb_lock held. * Since we are holding hugetlb_lock, pages cannot get moved from * active list or uncharged from the cgroup, So no need to get * page reference and test for page active here. This function * cannot fail. */ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, struct folio *folio) { unsigned int nr_pages; struct page_counter *counter; struct hugetlb_cgroup *hcg; struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg); hcg = hugetlb_cgroup_from_folio(folio); /* * We can have pages in active list without any cgroup * ie, hugepage with less than 3 pages. We can safely * ignore those pages. */ if (!hcg || hcg != h_cg) goto out; nr_pages = folio_nr_pages(folio); if (!parent) { parent = root_h_cgroup; /* root has no limit */ page_counter_charge(&parent->hugepage[idx], nr_pages); } counter = &h_cg->hugepage[idx]; /* Take the pages off the local counter */ page_counter_cancel(counter, nr_pages); set_hugetlb_cgroup(folio, parent); out: return; } /* * Force the hugetlb cgroup to empty the hugetlb resources by moving them to * the parent cgroup. */ static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css) { struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); struct hstate *h; struct folio *folio; do { for_each_hstate(h) { spin_lock_irq(&hugetlb_lock); list_for_each_entry(folio, &h->hugepage_activelist, lru) hugetlb_cgroup_move_parent(hstate_index(h), h_cg, folio); spin_unlock_irq(&hugetlb_lock); } cond_resched(); } while (hugetlb_cgroup_have_usage(h_cg)); } static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx, enum hugetlb_memory_event event) { atomic_long_inc(&hugetlb->events_local[idx][event]); cgroup_file_notify(&hugetlb->events_local_file[idx]); do { atomic_long_inc(&hugetlb->events[idx][event]); cgroup_file_notify(&hugetlb->events_file[idx]); } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) && !hugetlb_cgroup_is_root(hugetlb)); } static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup **ptr, bool rsvd) { int ret = 0; struct page_counter *counter; struct hugetlb_cgroup *h_cg = NULL; if (hugetlb_cgroup_disabled()) goto done; again: rcu_read_lock(); h_cg = hugetlb_cgroup_from_task(current); if (!css_tryget(&h_cg->css)) { rcu_read_unlock(); goto again; } rcu_read_unlock(); if (!page_counter_try_charge( __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), nr_pages, &counter)) { ret = -ENOMEM; hugetlb_event(h_cg, idx, HUGETLB_MAX); css_put(&h_cg->css); goto done; } /* Reservations take a reference to the css because they do not get * reparented. */ if (!rsvd) css_put(&h_cg->css); done: *ptr = h_cg; return ret; } int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup **ptr) { return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false); } int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, struct hugetlb_cgroup **ptr) { return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true); } /* Should be called with hugetlb_lock held */ static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, struct folio *folio, bool rsvd) { if (hugetlb_cgroup_disabled() || !h_cg) return; lockdep_assert_held(&hugetlb_lock); __set_hugetlb_cgroup(folio, h_cg, rsvd); if (!rsvd) { unsigned long usage = h_cg->nodeinfo[folio_nid(folio)]->usage[idx]; /* * This write is not atomic due to fetching usage and writing * to it, but that's fine because we call this with * hugetlb_lock held anyway. */ WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx], usage + nr_pages); } } void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, struct folio *folio) { __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, false); } void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, struct folio *folio) { __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, true); } /* * Should be called with hugetlb_lock held */ static void __hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, struct folio *folio, bool rsvd) { struct hugetlb_cgroup *h_cg; if (hugetlb_cgroup_disabled()) return; lockdep_assert_held(&hugetlb_lock); h_cg = __hugetlb_cgroup_from_folio(folio, rsvd); if (unlikely(!h_cg)) return; __set_hugetlb_cgroup(folio, NULL, rsvd); page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), nr_pages); if (rsvd) css_put(&h_cg->css); else { unsigned long usage = h_cg->nodeinfo[folio_nid(folio)]->usage[idx]; /* * This write is not atomic due to fetching usage and writing * to it, but that's fine because we call this with * hugetlb_lock held anyway. */ WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx], usage - nr_pages); } } void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, struct folio *folio) { __hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, false); } void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages, struct folio *folio) { __hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, true); } static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, bool rsvd) { if (hugetlb_cgroup_disabled() || !h_cg) return; page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), nr_pages); if (rsvd) css_put(&h_cg->css); } void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg) { __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false); } void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg) { __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true); } void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start, unsigned long end) { if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter || !resv->css) return; page_counter_uncharge(resv->reservation_counter, (end - start) * resv->pages_per_hpage); css_put(resv->css); } void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, struct file_region *rg, unsigned long nr_pages, bool region_del) { if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages) return; if (rg->reservation_counter && resv->pages_per_hpage && !resv->reservation_counter) { page_counter_uncharge(rg->reservation_counter, nr_pages * resv->pages_per_hpage); /* * Only do css_put(rg->css) when we delete the entire region * because one file_region must hold exactly one css reference. */ if (region_del) css_put(rg->css); } } enum { RES_USAGE, RES_RSVD_USAGE, RES_LIMIT, RES_RSVD_LIMIT, RES_MAX_USAGE, RES_RSVD_MAX_USAGE, RES_FAILCNT, RES_RSVD_FAILCNT, }; static int hugetlb_cgroup_read_numa_stat(struct seq_file *seq, void *dummy) { int nid; struct cftype *cft = seq_cft(seq); int idx = MEMFILE_IDX(cft->private); bool legacy = !cgroup_subsys_on_dfl(hugetlb_cgrp_subsys); struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); struct cgroup_subsys_state *css; unsigned long usage; if (legacy) { /* Add up usage across all nodes for the non-hierarchical total. */ usage = 0; for_each_node_state(nid, N_MEMORY) usage += READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]); seq_printf(seq, "total=%lu", usage * PAGE_SIZE); /* Simply print the per-node usage for the non-hierarchical total. */ for_each_node_state(nid, N_MEMORY) seq_printf(seq, " N%d=%lu", nid, READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]) * PAGE_SIZE); seq_putc(seq, '\n'); } /* * The hierarchical total is pretty much the value recorded by the * counter, so use that. */ seq_printf(seq, "%stotal=%lu", legacy ? "hierarchical_" : "", page_counter_read(&h_cg->hugepage[idx]) * PAGE_SIZE); /* * For each node, transverse the css tree to obtain the hierarchical * node usage. */ for_each_node_state(nid, N_MEMORY) { usage = 0; rcu_read_lock(); css_for_each_descendant_pre(css, &h_cg->css) { usage += READ_ONCE(hugetlb_cgroup_from_css(css) ->nodeinfo[nid] ->usage[idx]); } rcu_read_unlock(); seq_printf(seq, " N%d=%lu", nid, usage * PAGE_SIZE); } seq_putc(seq, '\n'); return 0; } static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, struct cftype *cft) { struct page_counter *counter; struct page_counter *rsvd_counter; struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)]; rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)]; switch (MEMFILE_ATTR(cft->private)) { case RES_USAGE: return (u64)page_counter_read(counter) * PAGE_SIZE; case RES_RSVD_USAGE: return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE; case RES_LIMIT: return (u64)counter->max * PAGE_SIZE; case RES_RSVD_LIMIT: return (u64)rsvd_counter->max * PAGE_SIZE; case RES_MAX_USAGE: return (u64)counter->watermark * PAGE_SIZE; case RES_RSVD_MAX_USAGE: return (u64)rsvd_counter->watermark * PAGE_SIZE; case RES_FAILCNT: return counter->failcnt; case RES_RSVD_FAILCNT: return rsvd_counter->failcnt; default: BUG(); } } static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v) { int idx; u64 val; struct cftype *cft = seq_cft(seq); unsigned long limit; struct page_counter *counter; struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); idx = MEMFILE_IDX(cft->private); counter = &h_cg->hugepage[idx]; limit = round_down(PAGE_COUNTER_MAX, pages_per_huge_page(&hstates[idx])); switch (MEMFILE_ATTR(cft->private)) { case RES_RSVD_USAGE: counter = &h_cg->rsvd_hugepage[idx]; fallthrough; case RES_USAGE: val = (u64)page_counter_read(counter); seq_printf(seq, "%llu\n", val * PAGE_SIZE); break; case RES_RSVD_LIMIT: counter = &h_cg->rsvd_hugepage[idx]; fallthrough; case RES_LIMIT: val = (u64)counter->max; if (val == limit) seq_puts(seq, "max\n"); else seq_printf(seq, "%llu\n", val * PAGE_SIZE); break; default: BUG(); } return 0; } static DEFINE_MUTEX(hugetlb_limit_mutex); static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off, const char *max) { int ret, idx; unsigned long nr_pages; struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); bool rsvd = false; if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */ return -EINVAL; buf = strstrip(buf); ret = page_counter_memparse(buf, max, &nr_pages); if (ret) return ret; idx = MEMFILE_IDX(of_cft(of)->private); nr_pages = round_down(nr_pages, pages_per_huge_page(&hstates[idx])); switch (MEMFILE_ATTR(of_cft(of)->private)) { case RES_RSVD_LIMIT: rsvd = true; fallthrough; case RES_LIMIT: mutex_lock(&hugetlb_limit_mutex); ret = page_counter_set_max( __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), nr_pages); mutex_unlock(&hugetlb_limit_mutex); break; default: ret = -EINVAL; break; } return ret ?: nbytes; } static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { return hugetlb_cgroup_write(of, buf, nbytes, off, "-1"); } static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { return hugetlb_cgroup_write(of, buf, nbytes, off, "max"); } static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { int ret = 0; struct page_counter *counter, *rsvd_counter; struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)]; rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)]; switch (MEMFILE_ATTR(of_cft(of)->private)) { case RES_MAX_USAGE: page_counter_reset_watermark(counter); break; case RES_RSVD_MAX_USAGE: page_counter_reset_watermark(rsvd_counter); break; case RES_FAILCNT: counter->failcnt = 0; break; case RES_RSVD_FAILCNT: rsvd_counter->failcnt = 0; break; default: ret = -EINVAL; break; } return ret ?: nbytes; } static char *mem_fmt(char *buf, int size, unsigned long hsize) { if (hsize >= SZ_1G) snprintf(buf, size, "%luGB", hsize / SZ_1G); else if (hsize >= SZ_1M) snprintf(buf, size, "%luMB", hsize / SZ_1M); else snprintf(buf, size, "%luKB", hsize / SZ_1K); return buf; } static int __hugetlb_events_show(struct seq_file *seq, bool local) { int idx; long max; struct cftype *cft = seq_cft(seq); struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); idx = MEMFILE_IDX(cft->private); if (local) max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]); else max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]); seq_printf(seq, "max %lu\n", max); return 0; } static int hugetlb_events_show(struct seq_file *seq, void *v) { return __hugetlb_events_show(seq, false); } static int hugetlb_events_local_show(struct seq_file *seq, void *v) { return __hugetlb_events_show(seq, true); } static struct cftype hugetlb_dfl_tmpl[] = { { .name = "max", .private = RES_LIMIT, .seq_show = hugetlb_cgroup_read_u64_max, .write = hugetlb_cgroup_write_dfl, .flags = CFTYPE_NOT_ON_ROOT, }, { .name = "rsvd.max", .private = RES_RSVD_LIMIT, .seq_show = hugetlb_cgroup_read_u64_max, .write = hugetlb_cgroup_write_dfl, .flags = CFTYPE_NOT_ON_ROOT, }, { .name = "current", .private = RES_USAGE, .seq_show = hugetlb_cgroup_read_u64_max, .flags = CFTYPE_NOT_ON_ROOT, }, { .name = "rsvd.current", .private = RES_RSVD_USAGE, .seq_show = hugetlb_cgroup_read_u64_max, .flags = CFTYPE_NOT_ON_ROOT, }, { .name = "events", .seq_show = hugetlb_events_show, .file_offset = MEMFILE_OFFSET(struct hugetlb_cgroup, events_file[0]), .flags = CFTYPE_NOT_ON_ROOT, }, { .name = "events.local", .seq_show = hugetlb_events_local_show, .file_offset = MEMFILE_OFFSET(struct hugetlb_cgroup, events_local_file[0]), .flags = CFTYPE_NOT_ON_ROOT, }, { .name = "numa_stat", .seq_show = hugetlb_cgroup_read_numa_stat, .flags = CFTYPE_NOT_ON_ROOT, }, /* don't need terminator here */ }; static struct cftype hugetlb_legacy_tmpl[] = { { .name = "limit_in_bytes", .private = RES_LIMIT, .read_u64 = hugetlb_cgroup_read_u64, .write = hugetlb_cgroup_write_legacy, }, { .name = "rsvd.limit_in_bytes", .private = RES_RSVD_LIMIT, .read_u64 = hugetlb_cgroup_read_u64, .write = hugetlb_cgroup_write_legacy, }, { .name = "usage_in_bytes", .private = RES_USAGE, .read_u64 = hugetlb_cgroup_read_u64, }, { .name = "rsvd.usage_in_bytes", .private = RES_RSVD_USAGE, .read_u64 = hugetlb_cgroup_read_u64, }, { .name = "max_usage_in_bytes", .private = RES_MAX_USAGE, .write = hugetlb_cgroup_reset, .read_u64 = hugetlb_cgroup_read_u64, }, { .name = "rsvd.max_usage_in_bytes", .private = RES_RSVD_MAX_USAGE, .write = hugetlb_cgroup_reset, .read_u64 = hugetlb_cgroup_read_u64, }, { .name = "failcnt", .private = RES_FAILCNT, .write = hugetlb_cgroup_reset, .read_u64 = hugetlb_cgroup_read_u64, }, { .name = "rsvd.failcnt", .private = RES_RSVD_FAILCNT, .write = hugetlb_cgroup_reset, .read_u64 = hugetlb_cgroup_read_u64, }, { .name = "numa_stat", .seq_show = hugetlb_cgroup_read_numa_stat, }, /* don't need terminator here */ }; static void __init hugetlb_cgroup_cfttypes_init(struct hstate *h, struct cftype *cft, struct cftype *tmpl, int tmpl_size) { char buf[32]; int i, idx = hstate_index(h); /* format the size */ mem_fmt(buf, sizeof(buf), huge_page_size(h)); for (i = 0; i < tmpl_size; cft++, tmpl++, i++) { *cft = *tmpl; /* rebuild the name */ snprintf(cft->name, MAX_CFTYPE_NAME, "%s.%s", buf, tmpl->name); /* rebuild the private */ cft->private = MEMFILE_PRIVATE(idx, tmpl->private); /* rebuild the file_offset */ if (tmpl->file_offset) { unsigned int offset = tmpl->file_offset; cft->file_offset = MEMFILE_OFFSET0(offset) + MEMFILE_FIELD_SIZE(offset) * idx; } lockdep_register_key(&cft->lockdep_key); } } static void __init __hugetlb_cgroup_file_dfl_init(struct hstate *h) { int idx = hstate_index(h); hugetlb_cgroup_cfttypes_init(h, dfl_files + idx * DFL_TMPL_SIZE, hugetlb_dfl_tmpl, DFL_TMPL_SIZE); } static void __init __hugetlb_cgroup_file_legacy_init(struct hstate *h) { int idx = hstate_index(h); hugetlb_cgroup_cfttypes_init(h, legacy_files + idx * LEGACY_TMPL_SIZE, hugetlb_legacy_tmpl, LEGACY_TMPL_SIZE); } static void __init __hugetlb_cgroup_file_init(struct hstate *h) { __hugetlb_cgroup_file_dfl_init(h); __hugetlb_cgroup_file_legacy_init(h); } static void __init __hugetlb_cgroup_file_pre_init(void) { int cft_count; cft_count = hugetlb_max_hstate * DFL_TMPL_SIZE + 1; /* add terminator */ dfl_files = kcalloc(cft_count, sizeof(struct cftype), GFP_KERNEL); BUG_ON(!dfl_files); cft_count = hugetlb_max_hstate * LEGACY_TMPL_SIZE + 1; /* add terminator */ legacy_files = kcalloc(cft_count, sizeof(struct cftype), GFP_KERNEL); BUG_ON(!legacy_files); } static void __init __hugetlb_cgroup_file_post_init(void) { WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys, dfl_files)); WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys, legacy_files)); } void __init hugetlb_cgroup_file_init(void) { struct hstate *h; __hugetlb_cgroup_file_pre_init(); for_each_hstate(h) __hugetlb_cgroup_file_init(h); __hugetlb_cgroup_file_post_init(); } /* * hugetlb_lock will make sure a parallel cgroup rmdir won't happen * when we migrate hugepages */ void hugetlb_cgroup_migrate(struct folio *old_folio, struct folio *new_folio) { struct hugetlb_cgroup *h_cg; struct hugetlb_cgroup *h_cg_rsvd; struct hstate *h = folio_hstate(old_folio); if (hugetlb_cgroup_disabled()) return; spin_lock_irq(&hugetlb_lock); h_cg = hugetlb_cgroup_from_folio(old_folio); h_cg_rsvd = hugetlb_cgroup_from_folio_rsvd(old_folio); set_hugetlb_cgroup(old_folio, NULL); set_hugetlb_cgroup_rsvd(old_folio, NULL); /* move the h_cg details to new cgroup */ set_hugetlb_cgroup(new_folio, h_cg); set_hugetlb_cgroup_rsvd(new_folio, h_cg_rsvd); list_move(&new_folio->lru, &h->hugepage_activelist); spin_unlock_irq(&hugetlb_lock); } static struct cftype hugetlb_files[] = { {} /* terminate */ }; struct cgroup_subsys hugetlb_cgrp_subsys = { .css_alloc = hugetlb_cgroup_css_alloc, .css_offline = hugetlb_cgroup_css_offline, .css_free = hugetlb_cgroup_css_free, .dfl_cftypes = hugetlb_files, .legacy_cftypes = hugetlb_files, };
50 50 50 50 50 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 // SPDX-License-Identifier: GPL-2.0-or-later /* RxRPC virtual connection handler, common bits. * * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/slab.h> #include <linux/net.h> #include <linux/skbuff.h> #include "ar-internal.h" /* * Time till a connection expires after last use (in seconds). */ unsigned int __read_mostly rxrpc_connection_expiry = 10 * 60; unsigned int __read_mostly rxrpc_closed_conn_expiry = 10; static void rxrpc_clean_up_connection(struct work_struct *work); static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet, unsigned long reap_at); void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why) { struct rxrpc_local *local = conn->local; bool busy; if (WARN_ON_ONCE(!local)) return; spin_lock_irq(&local->lock); busy = !list_empty(&conn->attend_link); if (!busy) { rxrpc_get_connection(conn, why); list_add_tail(&conn->attend_link, &local->conn_attend_q); } spin_unlock_irq(&local->lock); rxrpc_wake_up_io_thread(local); } static void rxrpc_connection_timer(struct timer_list *timer) { struct rxrpc_connection *conn = container_of(timer, struct rxrpc_connection, timer); rxrpc_poke_conn(conn, rxrpc_conn_get_poke_timer); } /* * allocate a new connection */ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet, gfp_t gfp) { struct rxrpc_connection *conn; _enter(""); conn = kzalloc(sizeof(struct rxrpc_connection), gfp); if (conn) { INIT_LIST_HEAD(&conn->cache_link); timer_setup(&conn->timer, &rxrpc_connection_timer, 0); INIT_WORK(&conn->processor, rxrpc_process_connection); INIT_WORK(&conn->destructor, rxrpc_clean_up_connection); INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); INIT_LIST_HEAD(&conn->attend_link); mutex_init(&conn->security_lock); mutex_init(&conn->tx_data_alloc_lock); skb_queue_head_init(&conn->rx_queue); conn->rxnet = rxnet; conn->security = &rxrpc_no_security; rwlock_init(&conn->security_use_lock); spin_lock_init(&conn->state_lock); conn->debug_id = atomic_inc_return(&rxrpc_debug_id); conn->idle_timestamp = jiffies; } _leave(" = %p{%d}", conn, conn ? conn->debug_id : 0); return conn; } /* * Look up a connection in the cache by protocol parameters. * * If successful, a pointer to the connection is returned, but no ref is taken. * NULL is returned if there is no match. * * When searching for a service call, if we find a peer but no connection, we * return that through *_peer in case we need to create a new service call. * * The caller must be holding the RCU read lock. */ struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, struct sk_buff *skb) { struct rxrpc_connection *conn; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_peer *peer; _enter(",%x", sp->hdr.cid & RXRPC_CIDMASK); /* Look up client connections by connection ID alone as their * IDs are unique for this machine. */ conn = idr_find(&local->conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT); if (!conn || refcount_read(&conn->ref) == 0) { _debug("no conn"); goto not_found; } if (conn->proto.epoch != sp->hdr.epoch || conn->local != local) goto not_found; peer = conn->peer; switch (srx->transport.family) { case AF_INET: if (peer->srx.transport.sin.sin_port != srx->transport.sin.sin_port) goto not_found; break; #ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: if (peer->srx.transport.sin6.sin6_port != srx->transport.sin6.sin6_port) goto not_found; break; #endif default: BUG(); } _leave(" = %p", conn); return conn; not_found: _leave(" = NULL"); return NULL; } /* * Disconnect a call and clear any channel it occupies when that call * terminates. The caller must hold the channel_lock and must release the * call's ref on the connection. */ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, struct rxrpc_call *call) { struct rxrpc_channel *chan = &conn->channels[call->cid & RXRPC_CHANNELMASK]; _enter("%d,%x", conn->debug_id, call->cid); if (chan->call == call) { /* Save the result of the call so that we can repeat it if necessary * through the channel, whilst disposing of the actual call record. */ trace_rxrpc_disconnect_call(call); switch (call->completion) { case RXRPC_CALL_SUCCEEDED: chan->last_seq = call->rx_highest_seq; chan->last_type = RXRPC_PACKET_TYPE_ACK; break; case RXRPC_CALL_LOCALLY_ABORTED: chan->last_abort = call->abort_code; chan->last_type = RXRPC_PACKET_TYPE_ABORT; break; default: chan->last_abort = RX_CALL_DEAD; chan->last_type = RXRPC_PACKET_TYPE_ABORT; break; } chan->last_call = chan->call_id; chan->call_id = chan->call_counter; chan->call = NULL; } _leave(""); } /* * Disconnect a call and clear any channel it occupies when that call * terminates. */ void rxrpc_disconnect_call(struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); rxrpc_see_call(call, rxrpc_call_see_disconnected); call->peer->cong_ssthresh = call->cong_ssthresh; if (!hlist_unhashed(&call->error_link)) { spin_lock_irq(&call->peer->lock); hlist_del_init(&call->error_link); spin_unlock_irq(&call->peer->lock); } if (rxrpc_is_client_call(call)) { rxrpc_disconnect_client_call(call->bundle, call); } else { __rxrpc_disconnect_call(conn, call); conn->idle_timestamp = jiffies; if (atomic_dec_and_test(&conn->active)) rxrpc_set_service_reap_timer(conn->rxnet, jiffies + rxrpc_connection_expiry * HZ); } rxrpc_put_call(call, rxrpc_call_put_io_thread); } /* * Queue a connection's work processor, getting a ref to pass to the work * queue. */ void rxrpc_queue_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why) { if (atomic_read(&conn->active) >= 0 && rxrpc_queue_work(&conn->processor)) rxrpc_see_connection(conn, why); } /* * Note the re-emergence of a connection. */ void rxrpc_see_connection(struct rxrpc_connection *conn, enum rxrpc_conn_trace why) { if (conn) { int r = refcount_read(&conn->ref); trace_rxrpc_conn(conn->debug_id, r, why); } } /* * Get a ref on a connection. */ struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn, enum rxrpc_conn_trace why) { int r; __refcount_inc(&conn->ref, &r); trace_rxrpc_conn(conn->debug_id, r + 1, why); return conn; } /* * Try to get a ref on a connection. */ struct rxrpc_connection * rxrpc_get_connection_maybe(struct rxrpc_connection *conn, enum rxrpc_conn_trace why) { int r; if (conn) { if (__refcount_inc_not_zero(&conn->ref, &r)) trace_rxrpc_conn(conn->debug_id, r + 1, why); else conn = NULL; } return conn; } /* * Set the service connection reap timer. */ static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet, unsigned long reap_at) { if (rxnet->live) timer_reduce(&rxnet->service_conn_reap_timer, reap_at); } /* * destroy a virtual connection */ static void rxrpc_rcu_free_connection(struct rcu_head *rcu) { struct rxrpc_connection *conn = container_of(rcu, struct rxrpc_connection, rcu); struct rxrpc_net *rxnet = conn->rxnet; _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref)); trace_rxrpc_conn(conn->debug_id, refcount_read(&conn->ref), rxrpc_conn_free); kfree(conn); if (atomic_dec_and_test(&rxnet->nr_conns)) wake_up_var(&rxnet->nr_conns); } /* * Clean up a dead connection. */ static void rxrpc_clean_up_connection(struct work_struct *work) { struct rxrpc_connection *conn = container_of(work, struct rxrpc_connection, destructor); struct rxrpc_net *rxnet = conn->rxnet; ASSERT(!conn->channels[0].call && !conn->channels[1].call && !conn->channels[2].call && !conn->channels[3].call); ASSERT(list_empty(&conn->cache_link)); timer_delete_sync(&conn->timer); cancel_work_sync(&conn->processor); /* Processing may restart the timer */ timer_delete_sync(&conn->timer); write_lock(&rxnet->conn_lock); list_del_init(&conn->proc_link); write_unlock(&rxnet->conn_lock); if (conn->pmtud_probe) { trace_rxrpc_pmtud_lost(conn, 0); conn->peer->pmtud_probing = false; conn->peer->pmtud_pending = true; } rxrpc_purge_queue(&conn->rx_queue); rxrpc_free_skb(conn->tx_response, rxrpc_skb_put_response); rxrpc_kill_client_conn(conn); conn->security->clear(conn); key_put(conn->key); rxrpc_put_bundle(conn->bundle, rxrpc_bundle_put_conn); rxrpc_put_peer(conn->peer, rxrpc_peer_put_conn); rxrpc_put_local(conn->local, rxrpc_local_put_kill_conn); /* Drain the Rx queue. Note that even though we've unpublished, an * incoming packet could still be being added to our Rx queue, so we * will need to drain it again in the RCU cleanup handler. */ rxrpc_purge_queue(&conn->rx_queue); page_frag_cache_drain(&conn->tx_data_alloc); call_rcu(&conn->rcu, rxrpc_rcu_free_connection); } /* * Drop a ref on a connection. */ void rxrpc_put_connection(struct rxrpc_connection *conn, enum rxrpc_conn_trace why) { unsigned int debug_id; bool dead; int r; if (!conn) return; debug_id = conn->debug_id; dead = __refcount_dec_and_test(&conn->ref, &r); trace_rxrpc_conn(debug_id, r - 1, why); if (dead) { timer_delete(&conn->timer); cancel_work(&conn->processor); if (in_softirq() || work_busy(&conn->processor) || timer_pending(&conn->timer)) /* Can't use the rxrpc workqueue as we need to cancel/flush * something that may be running/waiting there. */ schedule_work(&conn->destructor); else rxrpc_clean_up_connection(&conn->destructor); } } /* * reap dead service connections */ void rxrpc_service_connection_reaper(struct work_struct *work) { struct rxrpc_connection *conn, *_p; struct rxrpc_net *rxnet = container_of(work, struct rxrpc_net, service_conn_reaper); unsigned long expire_at, earliest, idle_timestamp, now; int active; LIST_HEAD(graveyard); _enter(""); now = jiffies; earliest = now + MAX_JIFFY_OFFSET; write_lock(&rxnet->conn_lock); list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { ASSERTCMP(atomic_read(&conn->active), >=, 0); if (likely(atomic_read(&conn->active) > 0)) continue; if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) continue; if (rxnet->live && !conn->local->dead) { idle_timestamp = READ_ONCE(conn->idle_timestamp); expire_at = idle_timestamp + rxrpc_connection_expiry * HZ; if (conn->local->service_closed) expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ; _debug("reap CONN %d { a=%d,t=%ld }", conn->debug_id, atomic_read(&conn->active), (long)expire_at - (long)now); if (time_before(now, expire_at)) { if (time_before(expire_at, earliest)) earliest = expire_at; continue; } } /* The activity count sits at 0 whilst the conn is unused on * the list; we reduce that to -1 to make the conn unavailable. */ active = 0; if (!atomic_try_cmpxchg(&conn->active, &active, -1)) continue; rxrpc_see_connection(conn, rxrpc_conn_see_reap_service); if (rxrpc_conn_is_client(conn)) BUG(); else rxrpc_unpublish_service_conn(conn); list_move_tail(&conn->link, &graveyard); } write_unlock(&rxnet->conn_lock); if (earliest != now + MAX_JIFFY_OFFSET) { _debug("reschedule reaper %ld", (long)earliest - (long)now); ASSERT(time_after(earliest, now)); rxrpc_set_service_reap_timer(rxnet, earliest); } while (!list_empty(&graveyard)) { conn = list_entry(graveyard.next, struct rxrpc_connection, link); list_del_init(&conn->link); ASSERTCMP(atomic_read(&conn->active), ==, -1); rxrpc_put_connection(conn, rxrpc_conn_put_service_reaped); } _leave(""); } /* * preemptively destroy all the service connection records rather than * waiting for them to time out */ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) { struct rxrpc_connection *conn, *_p; bool leak = false; _enter(""); atomic_dec(&rxnet->nr_conns); timer_delete_sync(&rxnet->service_conn_reap_timer); rxrpc_queue_work(&rxnet->service_conn_reaper); flush_workqueue(rxrpc_workqueue); write_lock(&rxnet->conn_lock); list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { pr_err("AF_RXRPC: Leaked conn %p {%d}\n", conn, refcount_read(&conn->ref)); leak = true; } write_unlock(&rxnet->conn_lock); BUG_ON(leak); ASSERT(list_empty(&rxnet->conn_proc_list)); /* We need to wait for the connections to be destroyed by RCU as they * pin things that we still need to get rid of. */ wait_var_event(&rxnet->nr_conns, !atomic_read(&rxnet->nr_conns)); _leave(""); }
12101 8178 54 3 439 12101 22 3 157 724 3226 2909 4 4290 6361 3 5370 4110 37 93 225 4239 598 3 3 33 1431 203 3991 3279 236 871 98 88 7 54 7 54 250 109 2630 98 107 75 18 798 126 74 126 109 795 49 4095 271 29 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 /* SPDX-License-Identifier: GPL-2.0 */ /* * Macros for manipulating and testing page->flags */ #ifndef PAGE_FLAGS_H #define PAGE_FLAGS_H #include <linux/types.h> #include <linux/bug.h> #include <linux/mmdebug.h> #ifndef __GENERATING_BOUNDS_H #include <linux/mm_types.h> #include <generated/bounds.h> #endif /* !__GENERATING_BOUNDS_H */ /* * Various page->flags bits: * * PG_reserved is set for special pages. The "struct page" of such a page * should in general not be touched (e.g. set dirty) except by its owner. * Pages marked as PG_reserved include: * - Pages part of the kernel image (including vDSO) and similar (e.g. BIOS, * initrd, HW tables) * - Pages reserved or allocated early during boot (before the page allocator * was initialized). This includes (depending on the architecture) the * initial vmemmap, initial page tables, crashkernel, elfcorehdr, and much * much more. Once (if ever) freed, PG_reserved is cleared and they will * be given to the page allocator. * - Pages falling into physical memory gaps - not IORESOURCE_SYSRAM. Trying * to read/write these pages might end badly. Don't touch! * - The zero page(s) * - Pages allocated in the context of kexec/kdump (loaded kernel image, * control pages, vmcoreinfo) * - MMIO/DMA pages. Some architectures don't allow to ioremap pages that are * not marked PG_reserved (as they might be in use by somebody else who does * not respect the caching strategy). * - MCA pages on ia64 * - Pages holding CPU notes for POWER Firmware Assisted Dump * - Device memory (e.g. PMEM, DAX, HMM) * Some PG_reserved pages will be excluded from the hibernation image. * PG_reserved does in general not hinder anybody from dumping or swapping * and is no longer required for remap_pfn_range(). ioremap might require it. * Consequently, PG_reserved for a page mapped into user space can indicate * the zero page, the vDSO, MMIO pages or device memory. * * The PG_private bitflag is set on pagecache pages if they contain filesystem * specific data (which is normally at page->private). It can be used by * private allocations for its own usage. * * During initiation of disk I/O, PG_locked is set. This bit is set before I/O * and cleared when writeback _starts_ or when read _completes_. PG_writeback * is set before writeback starts and cleared when it finishes. * * PG_locked also pins a page in pagecache, and blocks truncation of the file * while it is held. * * page_waitqueue(page) is a wait queue of all tasks waiting for the page * to become unlocked. * * PG_swapbacked is set when a page uses swap as a backing storage. This are * usually PageAnon or shmem pages but please note that even anonymous pages * might lose their PG_swapbacked flag when they simply can be dropped (e.g. as * a result of MADV_FREE). * * PG_referenced, PG_reclaim are used for page reclaim for anonymous and * file-backed pagecache (see mm/vmscan.c). * * PG_arch_1 is an architecture specific page state bit. The generic code * guarantees that this bit is cleared for a page when it first is entered into * the page cache. * * PG_hwpoison indicates that a page got corrupted in hardware and contains * data with incorrect ECC bits that triggered a machine check. Accessing is * not safe since it may cause another machine check. Don't touch! */ /* * Don't use the pageflags directly. Use the PageFoo macros. * * The page flags field is split into two parts, the main flags area * which extends from the low bits upwards, and the fields area which * extends from the high bits downwards. * * | FIELD | ... | FLAGS | * N-1 ^ 0 * (NR_PAGEFLAGS) * * The fields area is reserved for fields mapping zone, node (for NUMA) and * SPARSEMEM section (for variants of SPARSEMEM that require section ids like * SPARSEMEM_EXTREME with !SPARSEMEM_VMEMMAP). */ enum pageflags { PG_locked, /* Page is locked. Don't touch. */ PG_writeback, /* Page is under writeback */ PG_referenced, PG_uptodate, PG_dirty, PG_lru, PG_head, /* Must be in bit 6 */ PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */ PG_active, PG_workingset, PG_owner_priv_1, /* Owner use. If pagecache, fs may use */ PG_owner_2, /* Owner use. If pagecache, fs may use */ PG_arch_1, PG_reserved, PG_private, /* If pagecache, has fs-private data */ PG_private_2, /* If pagecache, has fs aux data */ PG_reclaim, /* To be reclaimed asap */ PG_swapbacked, /* Page is backed by RAM/swap */ PG_unevictable, /* Page is "unevictable" */ PG_dropbehind, /* drop pages on IO completion */ #ifdef CONFIG_MMU PG_mlocked, /* Page is vma mlocked */ #endif #ifdef CONFIG_MEMORY_FAILURE PG_hwpoison, /* hardware poisoned page. Don't touch */ #endif #if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) PG_young, PG_idle, #endif #ifdef CONFIG_ARCH_USES_PG_ARCH_2 PG_arch_2, #endif #ifdef CONFIG_ARCH_USES_PG_ARCH_3 PG_arch_3, #endif __NR_PAGEFLAGS, PG_readahead = PG_reclaim, /* Anonymous memory (and shmem) */ PG_swapcache = PG_owner_priv_1, /* Swap page: swp_entry_t in private */ /* Some filesystems */ PG_checked = PG_owner_priv_1, /* * Depending on the way an anonymous folio can be mapped into a page * table (e.g., single PMD/PUD/CONT of the head page vs. PTE-mapped * THP), PG_anon_exclusive may be set only for the head page or for * tail pages of an anonymous folio. For now, we only expect it to be * set on tail pages for PTE-mapped THP. */ PG_anon_exclusive = PG_owner_2, /* * Set if all buffer heads in the folio are mapped. * Filesystems which do not use BHs can use it for their own purpose. */ PG_mappedtodisk = PG_owner_2, /* Two page bits are conscripted by FS-Cache to maintain local caching * state. These bits are set on pages belonging to the netfs's inodes * when those inodes are being locally cached. */ PG_fscache = PG_private_2, /* page backed by cache */ /* XEN */ /* Pinned in Xen as a read-only pagetable page. */ PG_pinned = PG_owner_priv_1, /* Pinned as part of domain save (see xen_mm_pin_all()). */ PG_savepinned = PG_dirty, /* Has a grant mapping of another (foreign) domain's page. */ PG_foreign = PG_owner_priv_1, /* Remapped by swiotlb-xen. */ PG_xen_remapped = PG_owner_priv_1, #ifdef CONFIG_MIGRATION /* movable_ops page that is isolated for migration */ PG_movable_ops_isolated = PG_reclaim, /* this is a movable_ops page (for selected typed pages only) */ PG_movable_ops = PG_uptodate, #endif /* Only valid for buddy pages. Used to track pages that are reported */ PG_reported = PG_uptodate, #ifdef CONFIG_MEMORY_HOTPLUG /* For self-hosted memmap pages */ PG_vmemmap_self_hosted = PG_owner_priv_1, #endif /* * Flags only valid for compound pages. Stored in first tail page's * flags word. Cannot use the first 8 flags or any flag marked as * PF_ANY. */ /* At least one page in this folio has the hwpoison flag set */ PG_has_hwpoisoned = PG_active, PG_large_rmappable = PG_workingset, /* anon or file-backed */ PG_partially_mapped = PG_reclaim, /* was identified to be partially mapped */ }; #define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1) #ifndef __GENERATING_BOUNDS_H #ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP DECLARE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key); /* * Return the real head page struct iff the @page is a fake head page, otherwise * return the @page itself. See Documentation/mm/vmemmap_dedup.rst. */ static __always_inline const struct page *page_fixed_fake_head(const struct page *page) { if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key)) return page; /* * Only addresses aligned with PAGE_SIZE of struct page may be fake head * struct page. The alignment check aims to avoid access the fields ( * e.g. compound_head) of the @page[1]. It can avoid touch a (possibly) * cold cacheline in some cases. */ if (IS_ALIGNED((unsigned long)page, PAGE_SIZE) && test_bit(PG_head, &page->flags.f)) { /* * We can safely access the field of the @page[1] with PG_head * because the @page is a compound page composed with at least * two contiguous pages. */ unsigned long head = READ_ONCE(page[1].compound_head); if (likely(head & 1)) return (const struct page *)(head - 1); } return page; } static __always_inline bool page_count_writable(const struct page *page, int u) { if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key)) return true; /* * The refcount check is ordered before the fake-head check to prevent * the following race: * CPU 1 (HVO) CPU 2 (speculative PFN walker) * * page_ref_freeze() * synchronize_rcu() * rcu_read_lock() * page_is_fake_head() is false * vmemmap_remap_pte() * XXX: struct page[] becomes r/o * * page_ref_unfreeze() * page_ref_count() is not zero * * atomic_add_unless(&page->_refcount) * XXX: try to modify r/o struct page[] * * The refcount check also prevents modification attempts to other (r/o) * tail pages that are not fake heads. */ if (atomic_read_acquire(&page->_refcount) == u) return false; return page_fixed_fake_head(page) == page; } #else static inline const struct page *page_fixed_fake_head(const struct page *page) { return page; } static inline bool page_count_writable(const struct page *page, int u) { return true; } #endif static __always_inline int page_is_fake_head(const struct page *page) { return page_fixed_fake_head(page) != page; } static __always_inline unsigned long _compound_head(const struct page *page) { unsigned long head = READ_ONCE(page->compound_head); if (unlikely(head & 1)) return head - 1; return (unsigned long)page_fixed_fake_head(page); } #define compound_head(page) ((typeof(page))_compound_head(page)) /** * page_folio - Converts from page to folio. * @p: The page. * * Every page is part of a folio. This function cannot be called on a * NULL pointer. * * Context: No reference, nor lock is required on @page. If the caller * does not hold a reference, this call may race with a folio split, so * it should re-check the folio still contains this page after gaining * a reference on the folio. * Return: The folio which contains this page. */ #define page_folio(p) (_Generic((p), \ const struct page *: (const struct folio *)_compound_head(p), \ struct page *: (struct folio *)_compound_head(p))) /** * folio_page - Return a page from a folio. * @folio: The folio. * @n: The page number to return. * * @n is relative to the start of the folio. This function does not * check that the page number lies within @folio; the caller is presumed * to have a reference to the page. */ #define folio_page(folio, n) (&(folio)->page + (n)) static __always_inline int PageTail(const struct page *page) { return READ_ONCE(page->compound_head) & 1 || page_is_fake_head(page); } static __always_inline int PageCompound(const struct page *page) { return test_bit(PG_head, &page->flags.f) || READ_ONCE(page->compound_head) & 1; } #define PAGE_POISON_PATTERN -1l static inline int PagePoisoned(const struct page *page) { return READ_ONCE(page->flags.f) == PAGE_POISON_PATTERN; } #ifdef CONFIG_DEBUG_VM void page_init_poison(struct page *page, size_t size); #else static inline void page_init_poison(struct page *page, size_t size) { } #endif static const unsigned long *const_folio_flags(const struct folio *folio, unsigned n) { const struct page *page = &folio->page; VM_BUG_ON_PGFLAGS(page->compound_head & 1, page); VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags.f), page); return &page[n].flags.f; } static unsigned long *folio_flags(struct folio *folio, unsigned n) { struct page *page = &folio->page; VM_BUG_ON_PGFLAGS(page->compound_head & 1, page); VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags.f), page); return &page[n].flags.f; } /* * Page flags policies wrt compound pages * * PF_POISONED_CHECK * check if this struct page poisoned/uninitialized * * PF_ANY: * the page flag is relevant for small, head and tail pages. * * PF_HEAD: * for compound page all operations related to the page flag applied to * head page. * * PF_NO_TAIL: * modifications of the page flag must be done on small or head pages, * checks can be done on tail pages too. * * PF_NO_COMPOUND: * the page flag is not relevant for compound pages. * * PF_SECOND: * the page flag is stored in the first tail page. */ #define PF_POISONED_CHECK(page) ({ \ VM_BUG_ON_PGFLAGS(PagePoisoned(page), page); \ page; }) #define PF_ANY(page, enforce) PF_POISONED_CHECK(page) #define PF_HEAD(page, enforce) PF_POISONED_CHECK(compound_head(page)) #define PF_NO_TAIL(page, enforce) ({ \ VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \ PF_POISONED_CHECK(compound_head(page)); }) #define PF_NO_COMPOUND(page, enforce) ({ \ VM_BUG_ON_PGFLAGS(enforce && PageCompound(page), page); \ PF_POISONED_CHECK(page); }) #define PF_SECOND(page, enforce) ({ \ VM_BUG_ON_PGFLAGS(!PageHead(page), page); \ PF_POISONED_CHECK(&page[1]); }) /* Which page is the flag stored in */ #define FOLIO_PF_ANY 0 #define FOLIO_PF_HEAD 0 #define FOLIO_PF_NO_TAIL 0 #define FOLIO_PF_NO_COMPOUND 0 #define FOLIO_PF_SECOND 1 #define FOLIO_HEAD_PAGE 0 #define FOLIO_SECOND_PAGE 1 /* * Macros to create function definitions for page flags */ #define FOLIO_TEST_FLAG(name, page) \ static __always_inline bool folio_test_##name(const struct folio *folio) \ { return test_bit(PG_##name, const_folio_flags(folio, page)); } #define FOLIO_SET_FLAG(name, page) \ static __always_inline void folio_set_##name(struct folio *folio) \ { set_bit(PG_##name, folio_flags(folio, page)); } #define FOLIO_CLEAR_FLAG(name, page) \ static __always_inline void folio_clear_##name(struct folio *folio) \ { clear_bit(PG_##name, folio_flags(folio, page)); } #define __FOLIO_SET_FLAG(name, page) \ static __always_inline void __folio_set_##name(struct folio *folio) \ { __set_bit(PG_##name, folio_flags(folio, page)); } #define __FOLIO_CLEAR_FLAG(name, page) \ static __always_inline void __folio_clear_##name(struct folio *folio) \ { __clear_bit(PG_##name, folio_flags(folio, page)); } #define FOLIO_TEST_SET_FLAG(name, page) \ static __always_inline bool folio_test_set_##name(struct folio *folio) \ { return test_and_set_bit(PG_##name, folio_flags(folio, page)); } #define FOLIO_TEST_CLEAR_FLAG(name, page) \ static __always_inline bool folio_test_clear_##name(struct folio *folio) \ { return test_and_clear_bit(PG_##name, folio_flags(folio, page)); } #define FOLIO_FLAG(name, page) \ FOLIO_TEST_FLAG(name, page) \ FOLIO_SET_FLAG(name, page) \ FOLIO_CLEAR_FLAG(name, page) #define TESTPAGEFLAG(uname, lname, policy) \ FOLIO_TEST_FLAG(lname, FOLIO_##policy) \ static __always_inline int Page##uname(const struct page *page) \ { return test_bit(PG_##lname, &policy(page, 0)->flags.f); } #define SETPAGEFLAG(uname, lname, policy) \ FOLIO_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline void SetPage##uname(struct page *page) \ { set_bit(PG_##lname, &policy(page, 1)->flags.f); } #define CLEARPAGEFLAG(uname, lname, policy) \ FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline void ClearPage##uname(struct page *page) \ { clear_bit(PG_##lname, &policy(page, 1)->flags.f); } #define __SETPAGEFLAG(uname, lname, policy) \ __FOLIO_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline void __SetPage##uname(struct page *page) \ { __set_bit(PG_##lname, &policy(page, 1)->flags.f); } #define __CLEARPAGEFLAG(uname, lname, policy) \ __FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline void __ClearPage##uname(struct page *page) \ { __clear_bit(PG_##lname, &policy(page, 1)->flags.f); } #define TESTSETFLAG(uname, lname, policy) \ FOLIO_TEST_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline int TestSetPage##uname(struct page *page) \ { return test_and_set_bit(PG_##lname, &policy(page, 1)->flags.f); } #define TESTCLEARFLAG(uname, lname, policy) \ FOLIO_TEST_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline int TestClearPage##uname(struct page *page) \ { return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags.f); } #define PAGEFLAG(uname, lname, policy) \ TESTPAGEFLAG(uname, lname, policy) \ SETPAGEFLAG(uname, lname, policy) \ CLEARPAGEFLAG(uname, lname, policy) #define __PAGEFLAG(uname, lname, policy) \ TESTPAGEFLAG(uname, lname, policy) \ __SETPAGEFLAG(uname, lname, policy) \ __CLEARPAGEFLAG(uname, lname, policy) #define TESTSCFLAG(uname, lname, policy) \ TESTSETFLAG(uname, lname, policy) \ TESTCLEARFLAG(uname, lname, policy) #define FOLIO_TEST_FLAG_FALSE(name) \ static inline bool folio_test_##name(const struct folio *folio) \ { return false; } #define FOLIO_SET_FLAG_NOOP(name) \ static inline void folio_set_##name(struct folio *folio) { } #define FOLIO_CLEAR_FLAG_NOOP(name) \ static inline void folio_clear_##name(struct folio *folio) { } #define __FOLIO_SET_FLAG_NOOP(name) \ static inline void __folio_set_##name(struct folio *folio) { } #define __FOLIO_CLEAR_FLAG_NOOP(name) \ static inline void __folio_clear_##name(struct folio *folio) { } #define FOLIO_TEST_SET_FLAG_FALSE(name) \ static inline bool folio_test_set_##name(struct folio *folio) \ { return false; } #define FOLIO_TEST_CLEAR_FLAG_FALSE(name) \ static inline bool folio_test_clear_##name(struct folio *folio) \ { return false; } #define FOLIO_FLAG_FALSE(name) \ FOLIO_TEST_FLAG_FALSE(name) \ FOLIO_SET_FLAG_NOOP(name) \ FOLIO_CLEAR_FLAG_NOOP(name) #define TESTPAGEFLAG_FALSE(uname, lname) \ FOLIO_TEST_FLAG_FALSE(lname) \ static inline int Page##uname(const struct page *page) { return 0; } #define SETPAGEFLAG_NOOP(uname, lname) \ FOLIO_SET_FLAG_NOOP(lname) \ static inline void SetPage##uname(struct page *page) { } #define CLEARPAGEFLAG_NOOP(uname, lname) \ FOLIO_CLEAR_FLAG_NOOP(lname) \ static inline void ClearPage##uname(struct page *page) { } #define __CLEARPAGEFLAG_NOOP(uname, lname) \ __FOLIO_CLEAR_FLAG_NOOP(lname) \ static inline void __ClearPage##uname(struct page *page) { } #define TESTSETFLAG_FALSE(uname, lname) \ FOLIO_TEST_SET_FLAG_FALSE(lname) \ static inline int TestSetPage##uname(struct page *page) { return 0; } #define TESTCLEARFLAG_FALSE(uname, lname) \ FOLIO_TEST_CLEAR_FLAG_FALSE(lname) \ static inline int TestClearPage##uname(struct page *page) { return 0; } #define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \ SETPAGEFLAG_NOOP(uname, lname) CLEARPAGEFLAG_NOOP(uname, lname) #define TESTSCFLAG_FALSE(uname, lname) \ TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname) __PAGEFLAG(Locked, locked, PF_NO_TAIL) FOLIO_FLAG(waiters, FOLIO_HEAD_PAGE) FOLIO_FLAG(referenced, FOLIO_HEAD_PAGE) FOLIO_TEST_CLEAR_FLAG(referenced, FOLIO_HEAD_PAGE) __FOLIO_SET_FLAG(referenced, FOLIO_HEAD_PAGE) PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD) __CLEARPAGEFLAG(Dirty, dirty, PF_HEAD) PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD) TESTCLEARFLAG(LRU, lru, PF_HEAD) FOLIO_FLAG(active, FOLIO_HEAD_PAGE) __FOLIO_CLEAR_FLAG(active, FOLIO_HEAD_PAGE) FOLIO_TEST_CLEAR_FLAG(active, FOLIO_HEAD_PAGE) PAGEFLAG(Workingset, workingset, PF_HEAD) TESTCLEARFLAG(Workingset, workingset, PF_HEAD) PAGEFLAG(Checked, checked, PF_NO_COMPOUND) /* Used by some filesystems */ /* Xen */ PAGEFLAG(Pinned, pinned, PF_NO_COMPOUND) TESTSCFLAG(Pinned, pinned, PF_NO_COMPOUND) PAGEFLAG(SavePinned, savepinned, PF_NO_COMPOUND); PAGEFLAG(Foreign, foreign, PF_NO_COMPOUND); PAGEFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND) TESTCLEARFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND) PAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) __CLEARPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) __SETPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) FOLIO_FLAG(swapbacked, FOLIO_HEAD_PAGE) __FOLIO_CLEAR_FLAG(swapbacked, FOLIO_HEAD_PAGE) __FOLIO_SET_FLAG(swapbacked, FOLIO_HEAD_PAGE) /* * Private page markings that may be used by the filesystem that owns the page * for its own purposes. * - PG_private and PG_private_2 cause release_folio() and co to be invoked */ PAGEFLAG(Private, private, PF_ANY) FOLIO_FLAG(private_2, FOLIO_HEAD_PAGE) /* owner_2 can be set on tail pages for anon memory */ FOLIO_FLAG(owner_2, FOLIO_HEAD_PAGE) /* * Only test-and-set exist for PG_writeback. The unconditional operators are * risky: they bypass page accounting. */ TESTPAGEFLAG(Writeback, writeback, PF_NO_TAIL) TESTSCFLAG(Writeback, writeback, PF_NO_TAIL) FOLIO_FLAG(mappedtodisk, FOLIO_HEAD_PAGE) /* PG_readahead is only used for reads; PG_reclaim is only for writes */ PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL) TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL) FOLIO_FLAG(readahead, FOLIO_HEAD_PAGE) FOLIO_TEST_CLEAR_FLAG(readahead, FOLIO_HEAD_PAGE) FOLIO_FLAG(dropbehind, FOLIO_HEAD_PAGE) FOLIO_TEST_CLEAR_FLAG(dropbehind, FOLIO_HEAD_PAGE) __FOLIO_SET_FLAG(dropbehind, FOLIO_HEAD_PAGE) #ifdef CONFIG_HIGHMEM /* * Must use a macro here due to header dependency issues. page_zone() is not * available at this point. */ #define PageHighMem(__p) is_highmem_idx(page_zonenum(__p)) #define folio_test_highmem(__f) is_highmem_idx(folio_zonenum(__f)) #else PAGEFLAG_FALSE(HighMem, highmem) #endif #define PhysHighMem(__p) (PageHighMem(phys_to_page(__p))) /* Does kmap_local_folio() only allow access to one page of the folio? */ #ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP #define folio_test_partial_kmap(f) true #else #define folio_test_partial_kmap(f) folio_test_highmem(f) #endif #ifdef CONFIG_SWAP static __always_inline bool folio_test_swapcache(const struct folio *folio) { return folio_test_swapbacked(folio) && test_bit(PG_swapcache, const_folio_flags(folio, 0)); } FOLIO_SET_FLAG(swapcache, FOLIO_HEAD_PAGE) FOLIO_CLEAR_FLAG(swapcache, FOLIO_HEAD_PAGE) #else FOLIO_FLAG_FALSE(swapcache) #endif FOLIO_FLAG(unevictable, FOLIO_HEAD_PAGE) __FOLIO_CLEAR_FLAG(unevictable, FOLIO_HEAD_PAGE) FOLIO_TEST_CLEAR_FLAG(unevictable, FOLIO_HEAD_PAGE) #ifdef CONFIG_MMU FOLIO_FLAG(mlocked, FOLIO_HEAD_PAGE) __FOLIO_CLEAR_FLAG(mlocked, FOLIO_HEAD_PAGE) FOLIO_TEST_CLEAR_FLAG(mlocked, FOLIO_HEAD_PAGE) FOLIO_TEST_SET_FLAG(mlocked, FOLIO_HEAD_PAGE) #else FOLIO_FLAG_FALSE(mlocked) __FOLIO_CLEAR_FLAG_NOOP(mlocked) FOLIO_TEST_CLEAR_FLAG_FALSE(mlocked) FOLIO_TEST_SET_FLAG_FALSE(mlocked) #endif #ifdef CONFIG_MEMORY_FAILURE PAGEFLAG(HWPoison, hwpoison, PF_ANY) TESTSCFLAG(HWPoison, hwpoison, PF_ANY) #define __PG_HWPOISON (1UL << PG_hwpoison) #else PAGEFLAG_FALSE(HWPoison, hwpoison) #define __PG_HWPOISON 0 #endif #ifdef CONFIG_PAGE_IDLE_FLAG #ifdef CONFIG_64BIT FOLIO_TEST_FLAG(young, FOLIO_HEAD_PAGE) FOLIO_SET_FLAG(young, FOLIO_HEAD_PAGE) FOLIO_TEST_CLEAR_FLAG(young, FOLIO_HEAD_PAGE) FOLIO_FLAG(idle, FOLIO_HEAD_PAGE) #endif /* See page_idle.h for !64BIT workaround */ #else /* !CONFIG_PAGE_IDLE_FLAG */ FOLIO_FLAG_FALSE(young) FOLIO_TEST_CLEAR_FLAG_FALSE(young) FOLIO_FLAG_FALSE(idle) #endif /* * PageReported() is used to track reported free pages within the Buddy * allocator. We can use the non-atomic version of the test and set * operations as both should be shielded with the zone lock to prevent * any possible races on the setting or clearing of the bit. */ __PAGEFLAG(Reported, reported, PF_NO_COMPOUND) #ifdef CONFIG_MEMORY_HOTPLUG PAGEFLAG(VmemmapSelfHosted, vmemmap_self_hosted, PF_ANY) #else PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) #endif /* * On an anonymous folio mapped into a user virtual memory area, * folio->mapping points to its anon_vma, not to a struct address_space; * with the FOLIO_MAPPING_ANON bit set to distinguish it. See rmap.h. * * On an anonymous folio in a VM_MERGEABLE area, if CONFIG_KSM is enabled, * the FOLIO_MAPPING_ANON_KSM bit may be set along with the FOLIO_MAPPING_ANON * bit; and then folio->mapping points, not to an anon_vma, but to a private * structure which KSM associates with that merged folio. See ksm.h. * * Please note that, confusingly, "folio_mapping" refers to the inode * address_space which maps the folio from disk; whereas "folio_mapped" * refers to user virtual address space into which the folio is mapped. * * For slab pages, since slab reuses the bits in struct page to store its * internal states, the folio->mapping does not exist as such, nor do * these flags below. So in order to avoid testing non-existent bits, * please make sure that folio_test_slab(folio) actually evaluates to * false before calling the following functions (e.g., folio_test_anon). * See mm/slab.h. */ #define FOLIO_MAPPING_ANON 0x1 #define FOLIO_MAPPING_ANON_KSM 0x2 #define FOLIO_MAPPING_KSM (FOLIO_MAPPING_ANON | FOLIO_MAPPING_ANON_KSM) #define FOLIO_MAPPING_FLAGS (FOLIO_MAPPING_ANON | FOLIO_MAPPING_ANON_KSM) static __always_inline bool folio_test_anon(const struct folio *folio) { return ((unsigned long)folio->mapping & FOLIO_MAPPING_ANON) != 0; } static __always_inline bool PageAnonNotKsm(const struct page *page) { unsigned long flags = (unsigned long)page_folio(page)->mapping; return (flags & FOLIO_MAPPING_FLAGS) == FOLIO_MAPPING_ANON; } static __always_inline bool PageAnon(const struct page *page) { return folio_test_anon(page_folio(page)); } #ifdef CONFIG_KSM /* * A KSM page is one of those write-protected "shared pages" or "merged pages" * which KSM maps into multiple mms, wherever identical anonymous page content * is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any * anon_vma, but to that page's node of the stable tree. */ static __always_inline bool folio_test_ksm(const struct folio *folio) { return ((unsigned long)folio->mapping & FOLIO_MAPPING_FLAGS) == FOLIO_MAPPING_KSM; } #else FOLIO_TEST_FLAG_FALSE(ksm) #endif u64 stable_page_flags(const struct page *page); /** * folio_xor_flags_has_waiters - Change some folio flags. * @folio: The folio. * @mask: Bits set in this word will be changed. * * This must only be used for flags which are changed with the folio * lock held. For example, it is unsafe to use for PG_dirty as that * can be set without the folio lock held. It can also only be used * on flags which are in the range 0-6 as some of the implementations * only affect those bits. * * Return: Whether there are tasks waiting on the folio. */ static inline bool folio_xor_flags_has_waiters(struct folio *folio, unsigned long mask) { return xor_unlock_is_negative_byte(mask, folio_flags(folio, 0)); } /** * folio_test_uptodate - Is this folio up to date? * @folio: The folio. * * The uptodate flag is set on a folio when every byte in the folio is * at least as new as the corresponding bytes on storage. Anonymous * and CoW folios are always uptodate. If the folio is not uptodate, * some of the bytes in it may be; see the is_partially_uptodate() * address_space operation. */ static inline bool folio_test_uptodate(const struct folio *folio) { bool ret = test_bit(PG_uptodate, const_folio_flags(folio, 0)); /* * Must ensure that the data we read out of the folio is loaded * _after_ we've loaded folio->flags to check the uptodate bit. * We can skip the barrier if the folio is not uptodate, because * we wouldn't be reading anything from it. * * See folio_mark_uptodate() for the other side of the story. */ if (ret) smp_rmb(); return ret; } static inline bool PageUptodate(const struct page *page) { return folio_test_uptodate(page_folio(page)); } static __always_inline void __folio_mark_uptodate(struct folio *folio) { smp_wmb(); __set_bit(PG_uptodate, folio_flags(folio, 0)); } static __always_inline void folio_mark_uptodate(struct folio *folio) { /* * Memory barrier must be issued before setting the PG_uptodate bit, * so that all previous stores issued in order to bring the folio * uptodate are actually visible before folio_test_uptodate becomes true. */ smp_wmb(); set_bit(PG_uptodate, folio_flags(folio, 0)); } static __always_inline void __SetPageUptodate(struct page *page) { __folio_mark_uptodate((struct folio *)page); } static __always_inline void SetPageUptodate(struct page *page) { folio_mark_uptodate((struct folio *)page); } CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL) void __folio_start_writeback(struct folio *folio, bool keep_write); void set_page_writeback(struct page *page); #define folio_start_writeback(folio) \ __folio_start_writeback(folio, false) static __always_inline bool folio_test_head(const struct folio *folio) { return test_bit(PG_head, const_folio_flags(folio, FOLIO_PF_ANY)); } static __always_inline int PageHead(const struct page *page) { PF_POISONED_CHECK(page); return test_bit(PG_head, &page->flags.f) && !page_is_fake_head(page); } __SETPAGEFLAG(Head, head, PF_ANY) __CLEARPAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY) /** * folio_test_large() - Does this folio contain more than one page? * @folio: The folio to test. * * Return: True if the folio is larger than one page. */ static inline bool folio_test_large(const struct folio *folio) { return folio_test_head(folio); } static __always_inline void set_compound_head(struct page *page, struct page *head) { WRITE_ONCE(page->compound_head, (unsigned long)head + 1); } static __always_inline void clear_compound_head(struct page *page) { WRITE_ONCE(page->compound_head, 0); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline void ClearPageCompound(struct page *page) { BUG_ON(!PageHead(page)); ClearPageHead(page); } FOLIO_FLAG(large_rmappable, FOLIO_SECOND_PAGE) FOLIO_FLAG(partially_mapped, FOLIO_SECOND_PAGE) #else FOLIO_FLAG_FALSE(large_rmappable) FOLIO_FLAG_FALSE(partially_mapped) #endif #define PG_head_mask ((1UL << PG_head)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * PageTransCompound returns true for both transparent huge pages * and hugetlbfs pages, so it should only be called when it's known * that hugetlbfs pages aren't involved. */ static inline int PageTransCompound(const struct page *page) { return PageCompound(page); } #else TESTPAGEFLAG_FALSE(TransCompound, transcompound) #endif #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE) /* * PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the * compound page. * * This flag is set by hwpoison handler. Cleared by THP split or free page. */ FOLIO_FLAG(has_hwpoisoned, FOLIO_SECOND_PAGE) #else FOLIO_FLAG_FALSE(has_hwpoisoned) #endif /* * For pages that do not use mapcount, page_type may be used. * The low 24 bits of pagetype may be used for your own purposes, as long * as you are careful to not affect the top 8 bits. The low bits of * pagetype will be overwritten when you clear the page_type from the page. */ enum pagetype { /* 0x00-0x7f are positive numbers, ie mapcount */ /* Reserve 0x80-0xef for mapcount overflow. */ PGTY_buddy = 0xf0, PGTY_offline = 0xf1, PGTY_table = 0xf2, PGTY_guard = 0xf3, PGTY_hugetlb = 0xf4, PGTY_slab = 0xf5, PGTY_zsmalloc = 0xf6, PGTY_unaccepted = 0xf7, PGTY_large_kmalloc = 0xf8, PGTY_mapcount_underflow = 0xff }; static inline bool page_type_has_type(int page_type) { return page_type < (PGTY_mapcount_underflow << 24); } /* This takes a mapcount which is one more than page->_mapcount */ static inline bool page_mapcount_is_type(unsigned int mapcount) { return page_type_has_type(mapcount - 1); } static inline bool page_has_type(const struct page *page) { return page_type_has_type(data_race(page->page_type)); } #define FOLIO_TYPE_OPS(lname, fname) \ static __always_inline bool folio_test_##fname(const struct folio *folio) \ { \ return data_race(folio->page.page_type >> 24) == PGTY_##lname; \ } \ static __always_inline void __folio_set_##fname(struct folio *folio) \ { \ if (folio_test_##fname(folio)) \ return; \ VM_BUG_ON_FOLIO(data_race(folio->page.page_type) != UINT_MAX, \ folio); \ folio->page.page_type = (unsigned int)PGTY_##lname << 24; \ } \ static __always_inline void __folio_clear_##fname(struct folio *folio) \ { \ if (folio->page.page_type == UINT_MAX) \ return; \ VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ folio->page.page_type = UINT_MAX; \ } #define PAGE_TYPE_OPS(uname, lname, fname) \ FOLIO_TYPE_OPS(lname, fname) \ static __always_inline int Page##uname(const struct page *page) \ { \ return data_race(page->page_type >> 24) == PGTY_##lname; \ } \ static __always_inline void __SetPage##uname(struct page *page) \ { \ if (Page##uname(page)) \ return; \ VM_BUG_ON_PAGE(data_race(page->page_type) != UINT_MAX, page); \ page->page_type = (unsigned int)PGTY_##lname << 24; \ } \ static __always_inline void __ClearPage##uname(struct page *page) \ { \ if (page->page_type == UINT_MAX) \ return; \ VM_BUG_ON_PAGE(!Page##uname(page), page); \ page->page_type = UINT_MAX; \ } /* * PageBuddy() indicates that the page is free and in the buddy system * (see mm/page_alloc.c). */ PAGE_TYPE_OPS(Buddy, buddy, buddy) /* * PageOffline() indicates that the page is logically offline although the * containing section is online. (e.g. inflated in a balloon driver or * not onlined when onlining the section). * The content of these pages is effectively stale. Such pages should not * be touched (read/write/dump/save) except by their owner. * * When a memory block gets onlined, all pages are initialized with a * refcount of 1 and PageOffline(). generic_online_page() will * take care of clearing PageOffline(). * * If a driver wants to allow to offline unmovable PageOffline() pages without * putting them back to the buddy, it can do so via the memory notifier by * decrementing the reference count in MEM_GOING_OFFLINE and incrementing the * reference count in MEM_CANCEL_OFFLINE. When offlining, the PageOffline() * pages (now with a reference count of zero) are treated like free (unmanaged) * pages, allowing the containing memory block to get offlined. A driver that * relies on this feature is aware that re-onlining the memory block will * require not giving them to the buddy via generic_online_page(). * * Memory offlining code will not adjust the managed page count for any * PageOffline() pages, treating them like they were never exposed to the * buddy using generic_online_page(). * * There are drivers that mark a page PageOffline() and expect there won't be * any further access to page content. PFN walkers that read content of random * pages should check PageOffline() and synchronize with such drivers using * page_offline_freeze()/page_offline_thaw(). */ PAGE_TYPE_OPS(Offline, offline, offline) extern void page_offline_freeze(void); extern void page_offline_thaw(void); extern void page_offline_begin(void); extern void page_offline_end(void); /* * Marks pages in use as page tables. */ PAGE_TYPE_OPS(Table, table, pgtable) /* * Marks guardpages used with debug_pagealloc. */ PAGE_TYPE_OPS(Guard, guard, guard) FOLIO_TYPE_OPS(slab, slab) /** * PageSlab - Determine if the page belongs to the slab allocator * @page: The page to test. * * Context: Any context. * Return: True for slab pages, false for any other kind of page. */ static inline bool PageSlab(const struct page *page) { return folio_test_slab(page_folio(page)); } #ifdef CONFIG_HUGETLB_PAGE FOLIO_TYPE_OPS(hugetlb, hugetlb) #else FOLIO_TEST_FLAG_FALSE(hugetlb) #endif PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc) /* * Mark pages that has to be accepted before touched for the first time. * * Serialized with zone lock. */ PAGE_TYPE_OPS(Unaccepted, unaccepted, unaccepted) FOLIO_TYPE_OPS(large_kmalloc, large_kmalloc) /** * PageHuge - Determine if the page belongs to hugetlbfs * @page: The page to test. * * Context: Any context. * Return: True for hugetlbfs pages, false for anon pages or pages * belonging to other filesystems. */ static inline bool PageHuge(const struct page *page) { return folio_test_hugetlb(page_folio(page)); } /* * Check if a page is currently marked HWPoisoned. Note that this check is * best effort only and inherently racy: there is no way to synchronize with * failing hardware. */ static inline bool is_page_hwpoison(const struct page *page) { const struct folio *folio; if (PageHWPoison(page)) return true; folio = page_folio(page); return folio_test_hugetlb(folio) && PageHWPoison(&folio->page); } static inline bool folio_contain_hwpoisoned_page(struct folio *folio) { return folio_test_hwpoison(folio) || (folio_test_large(folio) && folio_test_has_hwpoisoned(folio)); } bool is_free_buddy_page(const struct page *page); #ifdef CONFIG_MIGRATION /* * This page is migratable through movable_ops (for selected typed pages * only). * * Page migration of such pages might fail, for example, if the page is * already isolated by somebody else, or if the page is about to get freed. * * While a subsystem might set selected typed pages that support page migration * as being movable through movable_ops, it must never clear this flag. * * This flag is only cleared when the page is freed back to the buddy. * * Only selected page types support this flag (see page_movable_ops()) and * the flag might be used in other context for other pages. Always use * page_has_movable_ops() instead. */ TESTPAGEFLAG(MovableOps, movable_ops, PF_NO_TAIL); SETPAGEFLAG(MovableOps, movable_ops, PF_NO_TAIL); /* * A movable_ops page has this flag set while it is isolated for migration. * This flag primarily protects against concurrent migration attempts. * * Once migration ended (success or failure), the flag is cleared. The * flag is managed by the migration core. */ PAGEFLAG(MovableOpsIsolated, movable_ops_isolated, PF_NO_TAIL); #else /* !CONFIG_MIGRATION */ TESTPAGEFLAG_FALSE(MovableOps, movable_ops); SETPAGEFLAG_NOOP(MovableOps, movable_ops); PAGEFLAG_FALSE(MovableOpsIsolated, movable_ops_isolated); #endif /* CONFIG_MIGRATION */ /** * page_has_movable_ops - test for a movable_ops page * @page: The page to test. * * Test whether this is a movable_ops page. Such pages will stay that * way until freed. * * Returns true if this is a movable_ops page, otherwise false. */ static inline bool page_has_movable_ops(const struct page *page) { return PageMovableOps(page) && (PageOffline(page) || PageZsmalloc(page)); } static __always_inline int PageAnonExclusive(const struct page *page) { VM_BUG_ON_PGFLAGS(!PageAnon(page), page); /* * HugeTLB stores this information on the head page; THP keeps it per * page */ if (PageHuge(page)) page = compound_head(page); return test_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f); } static __always_inline void SetPageAnonExclusive(struct page *page) { VM_BUG_ON_PGFLAGS(!PageAnonNotKsm(page), page); VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); set_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f); } static __always_inline void ClearPageAnonExclusive(struct page *page) { VM_BUG_ON_PGFLAGS(!PageAnonNotKsm(page), page); VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f); } static __always_inline void __ClearPageAnonExclusive(struct page *page) { VM_BUG_ON_PGFLAGS(!PageAnon(page), page); VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); __clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f); } #ifdef CONFIG_MMU #define __PG_MLOCKED (1UL << PG_mlocked) #else #define __PG_MLOCKED 0 #endif /* * Flags checked when a page is freed. Pages being freed should not have * these flags set. If they are, there is a problem. */ #define PAGE_FLAGS_CHECK_AT_FREE \ (1UL << PG_lru | 1UL << PG_locked | \ 1UL << PG_private | 1UL << PG_private_2 | \ 1UL << PG_writeback | 1UL << PG_reserved | \ 1UL << PG_active | \ 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK) /* * Flags checked when a page is prepped for return by the page allocator. * Pages being prepped should not have these flags set. If they are set, * there has been a kernel bug or struct page corruption. * * __PG_HWPOISON is exceptional because it needs to be kept beyond page's * alloc-free cycle to prevent from reusing the page. */ #define PAGE_FLAGS_CHECK_AT_PREP \ ((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK) /* * Flags stored in the second page of a compound page. They may overlap * the CHECK_AT_FREE flags above, so need to be cleared. */ #define PAGE_FLAGS_SECOND \ (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ 1UL << PG_large_rmappable | 1UL << PG_partially_mapped) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) /** * folio_has_private - Determine if folio has private stuff * @folio: The folio to be checked * * Determine if a folio has private stuff, indicating that release routines * should be invoked upon it. */ static inline int folio_has_private(const struct folio *folio) { return !!(folio->flags.f & PAGE_FLAGS_PRIVATE); } #undef PF_ANY #undef PF_HEAD #undef PF_NO_TAIL #undef PF_NO_COMPOUND #undef PF_SECOND #endif /* !__GENERATING_BOUNDS_H */ #endif /* PAGE_FLAGS_H */
14 228 171 221 239 11 149 75 49 136 130 73 59 96 64 130 8 4 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef ASM_KVM_CACHE_REGS_H #define ASM_KVM_CACHE_REGS_H #include <linux/kvm_host.h> #define KVM_POSSIBLE_CR0_GUEST_BITS (X86_CR0_TS | X86_CR0_WP) #define KVM_POSSIBLE_CR4_GUEST_BITS \ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE \ | X86_CR4_CET) #define X86_CR0_PDPTR_BITS (X86_CR0_CD | X86_CR0_NW | X86_CR0_PG) #define X86_CR4_TLBFLUSH_BITS (X86_CR4_PGE | X86_CR4_PCIDE | X86_CR4_PAE | X86_CR4_SMEP) #define X86_CR4_PDPTR_BITS (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_SMEP) static_assert(!(KVM_POSSIBLE_CR0_GUEST_BITS & X86_CR0_PDPTR_BITS)); #define BUILD_KVM_GPR_ACCESSORS(lname, uname) \ static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\ { \ return vcpu->arch.regs[VCPU_REGS_##uname]; \ } \ static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \ unsigned long val) \ { \ vcpu->arch.regs[VCPU_REGS_##uname] = val; \ } BUILD_KVM_GPR_ACCESSORS(rax, RAX) BUILD_KVM_GPR_ACCESSORS(rbx, RBX) BUILD_KVM_GPR_ACCESSORS(rcx, RCX) BUILD_KVM_GPR_ACCESSORS(rdx, RDX) BUILD_KVM_GPR_ACCESSORS(rbp, RBP) BUILD_KVM_GPR_ACCESSORS(rsi, RSI) BUILD_KVM_GPR_ACCESSORS(rdi, RDI) #ifdef CONFIG_X86_64 BUILD_KVM_GPR_ACCESSORS(r8, R8) BUILD_KVM_GPR_ACCESSORS(r9, R9) BUILD_KVM_GPR_ACCESSORS(r10, R10) BUILD_KVM_GPR_ACCESSORS(r11, R11) BUILD_KVM_GPR_ACCESSORS(r12, R12) BUILD_KVM_GPR_ACCESSORS(r13, R13) BUILD_KVM_GPR_ACCESSORS(r14, R14) BUILD_KVM_GPR_ACCESSORS(r15, R15) #endif /* * Using the register cache from interrupt context is generally not allowed, as * caching a register and marking it available/dirty can't be done atomically, * i.e. accesses from interrupt context may clobber state or read stale data if * the vCPU task is in the process of updating the cache. The exception is if * KVM is handling a PMI IRQ/NMI VM-Exit, as that bound code sequence doesn't * touch the cache, it runs after the cache is reset (post VM-Exit), and PMIs * need to access several registers that are cacheable. */ #define kvm_assert_register_caching_allowed(vcpu) \ lockdep_assert_once(in_task() || kvm_arch_pmi_in_guest(vcpu)) /* * avail dirty * 0 0 register in VMCS/VMCB * 0 1 *INVALID* * 1 0 register in vcpu->arch * 1 1 register in vcpu->arch, needs to be stored back */ static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu, enum kvm_reg reg) { kvm_assert_register_caching_allowed(vcpu); return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); } static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu, enum kvm_reg reg) { kvm_assert_register_caching_allowed(vcpu); return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); } static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu, enum kvm_reg reg) { kvm_assert_register_caching_allowed(vcpu); __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); } static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu, enum kvm_reg reg) { kvm_assert_register_caching_allowed(vcpu); __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); } /* * kvm_register_test_and_mark_available() is a special snowflake that uses an * arch bitop directly to avoid the explicit instrumentation that comes with * the generic bitops. This allows code that cannot be instrumented (noinstr * functions), e.g. the low level VM-Enter/VM-Exit paths, to cache registers. */ static __always_inline bool kvm_register_test_and_mark_available(struct kvm_vcpu *vcpu, enum kvm_reg reg) { kvm_assert_register_caching_allowed(vcpu); return arch___test_and_set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); } /* * The "raw" register helpers are only for cases where the full 64 bits of a * register are read/written irrespective of current vCPU mode. In other words, * odds are good you shouldn't be using the raw variants. */ static inline unsigned long kvm_register_read_raw(struct kvm_vcpu *vcpu, int reg) { if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS)) return 0; if (!kvm_register_is_available(vcpu, reg)) kvm_x86_call(cache_reg)(vcpu, reg); return vcpu->arch.regs[reg]; } static inline void kvm_register_write_raw(struct kvm_vcpu *vcpu, int reg, unsigned long val) { if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS)) return; vcpu->arch.regs[reg] = val; kvm_register_mark_dirty(vcpu, reg); } static inline unsigned long kvm_rip_read(struct kvm_vcpu *vcpu) { return kvm_register_read_raw(vcpu, VCPU_REGS_RIP); } static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val) { kvm_register_write_raw(vcpu, VCPU_REGS_RIP, val); } static inline unsigned long kvm_rsp_read(struct kvm_vcpu *vcpu) { return kvm_register_read_raw(vcpu, VCPU_REGS_RSP); } static inline void kvm_rsp_write(struct kvm_vcpu *vcpu, unsigned long val) { kvm_register_write_raw(vcpu, VCPU_REGS_RSP, val); } static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) { might_sleep(); /* on svm */ if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR)) kvm_x86_call(cache_reg)(vcpu, VCPU_EXREG_PDPTR); return vcpu->arch.walk_mmu->pdptrs[index]; } static inline void kvm_pdptr_write(struct kvm_vcpu *vcpu, int index, u64 value) { vcpu->arch.walk_mmu->pdptrs[index] = value; } static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask) { ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS; if ((tmask & vcpu->arch.cr0_guest_owned_bits) && !kvm_register_is_available(vcpu, VCPU_EXREG_CR0)) kvm_x86_call(cache_reg)(vcpu, VCPU_EXREG_CR0); return vcpu->arch.cr0 & mask; } static __always_inline bool kvm_is_cr0_bit_set(struct kvm_vcpu *vcpu, unsigned long cr0_bit) { BUILD_BUG_ON(!is_power_of_2(cr0_bit)); return !!kvm_read_cr0_bits(vcpu, cr0_bit); } static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu) { return kvm_read_cr0_bits(vcpu, ~0UL); } static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask) { ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS; if ((tmask & vcpu->arch.cr4_guest_owned_bits) && !kvm_register_is_available(vcpu, VCPU_EXREG_CR4)) kvm_x86_call(cache_reg)(vcpu, VCPU_EXREG_CR4); return vcpu->arch.cr4 & mask; } static __always_inline bool kvm_is_cr4_bit_set(struct kvm_vcpu *vcpu, unsigned long cr4_bit) { BUILD_BUG_ON(!is_power_of_2(cr4_bit)); return !!kvm_read_cr4_bits(vcpu, cr4_bit); } static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu) { if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3)) kvm_x86_call(cache_reg)(vcpu, VCPU_EXREG_CR3); return vcpu->arch.cr3; } static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) { return kvm_read_cr4_bits(vcpu, ~0UL); } static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) { return (kvm_rax_read(vcpu) & -1u) | ((u64)(kvm_rdx_read(vcpu) & -1u) << 32); } static inline void enter_guest_mode(struct kvm_vcpu *vcpu) { vcpu->arch.hflags |= HF_GUEST_MASK; vcpu->stat.guest_mode = 1; } static inline void leave_guest_mode(struct kvm_vcpu *vcpu) { vcpu->arch.hflags &= ~HF_GUEST_MASK; if (vcpu->arch.load_eoi_exitmap_pending) { vcpu->arch.load_eoi_exitmap_pending = false; kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu); } vcpu->stat.guest_mode = 0; } static inline bool is_guest_mode(struct kvm_vcpu *vcpu) { return vcpu->arch.hflags & HF_GUEST_MASK; } #endif
281 283 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 // SPDX-License-Identifier: GPL-2.0 #include <linux/cache.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/pid_namespace.h> #include "internal.h" /* * /proc/self: */ static const char *proc_self_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { struct pid_namespace *ns = proc_pid_ns(inode->i_sb); pid_t tgid = task_tgid_nr_ns(current, ns); char *name; if (!tgid) return ERR_PTR(-ENOENT); /* max length of unsigned int in decimal + NULL term */ name = kmalloc(10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC); if (unlikely(!name)) return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD); sprintf(name, "%u", tgid); set_delayed_call(done, kfree_link, name); return name; } static const struct inode_operations proc_self_inode_operations = { .get_link = proc_self_get_link, }; static unsigned self_inum __ro_after_init; int proc_setup_self(struct super_block *s) { struct inode *root_inode = d_inode(s->s_root); struct proc_fs_info *fs_info = proc_sb_info(s); struct dentry *self; int ret = -ENOMEM; inode_lock(root_inode); self = d_alloc_name(s->s_root, "self"); if (self) { struct inode *inode = new_inode(s); if (inode) { inode->i_ino = self_inum; simple_inode_init_ts(inode); inode->i_mode = S_IFLNK | S_IRWXUGO; inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; inode->i_op = &proc_self_inode_operations; d_add(self, inode); ret = 0; } else { dput(self); } } inode_unlock(root_inode); if (ret) pr_err("proc_fill_super: can't allocate /proc/self\n"); else fs_info->proc_self = self; return ret; } void __init proc_self_init(void) { proc_alloc_inum(&self_inum); }
7 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_IF_MACVLAN_H #define _LINUX_IF_MACVLAN_H #include <linux/if_link.h> #include <linux/if_vlan.h> #include <linux/list.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <net/netlink.h> #include <linux/u64_stats_sync.h> struct macvlan_port; #define MACVLAN_MC_FILTER_BITS 8 #define MACVLAN_MC_FILTER_SZ (1 << MACVLAN_MC_FILTER_BITS) struct macvlan_dev { struct net_device *dev; struct list_head list; struct hlist_node hlist; struct macvlan_port *port; struct net_device *lowerdev; netdevice_tracker dev_tracker; void *accel_priv; struct vlan_pcpu_stats __percpu *pcpu_stats; DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ); netdev_features_t set_features; enum macvlan_mode mode; u16 flags; unsigned int macaddr_count; u32 bc_queue_len_req; #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *netpoll; #endif }; static inline void macvlan_count_rx(const struct macvlan_dev *vlan, unsigned int len, bool success, bool multicast) { if (likely(success)) { struct vlan_pcpu_stats *pcpu_stats; pcpu_stats = get_cpu_ptr(vlan->pcpu_stats); u64_stats_update_begin(&pcpu_stats->syncp); u64_stats_inc(&pcpu_stats->rx_packets); u64_stats_add(&pcpu_stats->rx_bytes, len); if (multicast) u64_stats_inc(&pcpu_stats->rx_multicast); u64_stats_update_end(&pcpu_stats->syncp); put_cpu_ptr(vlan->pcpu_stats); } else { this_cpu_inc(vlan->pcpu_stats->rx_errors); } } extern void macvlan_common_setup(struct net_device *dev); struct rtnl_newlink_params; extern int macvlan_common_newlink(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack); extern void macvlan_dellink(struct net_device *dev, struct list_head *head); extern int macvlan_link_register(struct rtnl_link_ops *ops); #if IS_ENABLED(CONFIG_MACVLAN) static inline struct net_device * macvlan_dev_real_dev(const struct net_device *dev) { struct macvlan_dev *macvlan = netdev_priv(dev); return macvlan->lowerdev; } #else static inline struct net_device * macvlan_dev_real_dev(const struct net_device *dev) { BUG(); return NULL; } #endif static inline void *macvlan_accel_priv(struct net_device *dev) { struct macvlan_dev *macvlan = netdev_priv(dev); return macvlan->accel_priv; } static inline bool macvlan_supports_dest_filter(struct net_device *dev) { struct macvlan_dev *macvlan = netdev_priv(dev); return macvlan->mode == MACVLAN_MODE_PRIVATE || macvlan->mode == MACVLAN_MODE_VEPA || macvlan->mode == MACVLAN_MODE_BRIDGE; } static inline int macvlan_release_l2fw_offload(struct net_device *dev) { struct macvlan_dev *macvlan = netdev_priv(dev); macvlan->accel_priv = NULL; return dev_uc_add(macvlan->lowerdev, dev->dev_addr); } #endif /* _LINUX_IF_MACVLAN_H */
12 2 14 14 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 // SPDX-License-Identifier: GPL-2.0-or-later /* * Driver for Zarlink DVB-T ZL10353 demodulator * * Copyright (C) 2006, 2007 Christopher Pascoe <c.pascoe@itee.uq.edu.au> */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/delay.h> #include <linux/string.h> #include <linux/slab.h> #include <asm/div64.h> #include <media/dvb_frontend.h> #include "zl10353_priv.h" #include "zl10353.h" struct zl10353_state { struct i2c_adapter *i2c; struct dvb_frontend frontend; struct zl10353_config config; u32 bandwidth; u32 ucblocks; u32 frequency; }; static int debug; #define dprintk(args...) \ do { \ if (debug) printk(KERN_DEBUG "zl10353: " args); \ } while (0) static int debug_regs; static int zl10353_single_write(struct dvb_frontend *fe, u8 reg, u8 val) { struct zl10353_state *state = fe->demodulator_priv; u8 buf[2] = { reg, val }; struct i2c_msg msg = { .addr = state->config.demod_address, .flags = 0, .buf = buf, .len = 2 }; int err = i2c_transfer(state->i2c, &msg, 1); if (err != 1) { printk("zl10353: write to reg %x failed (err = %d)!\n", reg, err); return err; } return 0; } static int zl10353_write(struct dvb_frontend *fe, const u8 ibuf[], int ilen) { int err, i; for (i = 0; i < ilen - 1; i++) if ((err = zl10353_single_write(fe, ibuf[0] + i, ibuf[i + 1]))) return err; return 0; } static int zl10353_read_register(struct zl10353_state *state, u8 reg) { int ret; u8 b0[1] = { reg }; u8 b1[1] = { 0 }; struct i2c_msg msg[2] = { { .addr = state->config.demod_address, .flags = 0, .buf = b0, .len = 1 }, { .addr = state->config.demod_address, .flags = I2C_M_RD, .buf = b1, .len = 1 } }; ret = i2c_transfer(state->i2c, msg, 2); if (ret != 2) { printk("%s: readreg error (reg=%d, ret==%i)\n", __func__, reg, ret); return ret; } return b1[0]; } static void zl10353_dump_regs(struct dvb_frontend *fe) { struct zl10353_state *state = fe->demodulator_priv; int ret; u8 reg; /* Dump all registers. */ for (reg = 0; ; reg++) { if (reg % 16 == 0) { if (reg) printk(KERN_CONT "\n"); printk(KERN_DEBUG "%02x:", reg); } ret = zl10353_read_register(state, reg); if (ret >= 0) printk(KERN_CONT " %02x", (u8)ret); else printk(KERN_CONT " --"); if (reg == 0xff) break; } printk(KERN_CONT "\n"); } static void zl10353_calc_nominal_rate(struct dvb_frontend *fe, u32 bandwidth, u16 *nominal_rate) { struct zl10353_state *state = fe->demodulator_priv; u32 adc_clock = 450560; /* 45.056 MHz */ u64 value; u8 bw = bandwidth / 1000000; if (state->config.adc_clock) adc_clock = state->config.adc_clock; value = (u64)10 * (1 << 23) / 7 * 125; value = (bw * value) + adc_clock / 2; *nominal_rate = div_u64(value, adc_clock); dprintk("%s: bw %d, adc_clock %d => 0x%x\n", __func__, bw, adc_clock, *nominal_rate); } static void zl10353_calc_input_freq(struct dvb_frontend *fe, u16 *input_freq) { struct zl10353_state *state = fe->demodulator_priv; u32 adc_clock = 450560; /* 45.056 MHz */ int if2 = 361667; /* 36.1667 MHz */ int ife; u64 value; if (state->config.adc_clock) adc_clock = state->config.adc_clock; if (state->config.if2) if2 = state->config.if2; if (adc_clock >= if2 * 2) ife = if2; else { ife = adc_clock - (if2 % adc_clock); if (ife > adc_clock / 2) ife = adc_clock - ife; } value = div_u64((u64)65536 * ife + adc_clock / 2, adc_clock); *input_freq = -value; dprintk("%s: if2 %d, ife %d, adc_clock %d => %d / 0x%x\n", __func__, if2, ife, adc_clock, -(int)value, *input_freq); } static int zl10353_sleep(struct dvb_frontend *fe) { static u8 zl10353_softdown[] = { 0x50, 0x0C, 0x44 }; zl10353_write(fe, zl10353_softdown, sizeof(zl10353_softdown)); return 0; } static int zl10353_set_parameters(struct dvb_frontend *fe) { struct dtv_frontend_properties *c = &fe->dtv_property_cache; struct zl10353_state *state = fe->demodulator_priv; u16 nominal_rate, input_freq; u8 pllbuf[6] = { 0x67 }, acq_ctl = 0; u16 tps = 0; state->frequency = c->frequency; zl10353_single_write(fe, RESET, 0x80); udelay(200); zl10353_single_write(fe, 0xEA, 0x01); udelay(200); zl10353_single_write(fe, 0xEA, 0x00); zl10353_single_write(fe, AGC_TARGET, 0x28); if (c->transmission_mode != TRANSMISSION_MODE_AUTO) acq_ctl |= (1 << 0); if (c->guard_interval != GUARD_INTERVAL_AUTO) acq_ctl |= (1 << 1); zl10353_single_write(fe, ACQ_CTL, acq_ctl); switch (c->bandwidth_hz) { case 6000000: /* These are extrapolated from the 7 and 8MHz values */ zl10353_single_write(fe, MCLK_RATIO, 0x97); zl10353_single_write(fe, 0x64, 0x34); zl10353_single_write(fe, 0xcc, 0xdd); break; case 7000000: zl10353_single_write(fe, MCLK_RATIO, 0x86); zl10353_single_write(fe, 0x64, 0x35); zl10353_single_write(fe, 0xcc, 0x73); break; default: c->bandwidth_hz = 8000000; fallthrough; case 8000000: zl10353_single_write(fe, MCLK_RATIO, 0x75); zl10353_single_write(fe, 0x64, 0x36); zl10353_single_write(fe, 0xcc, 0x73); } zl10353_calc_nominal_rate(fe, c->bandwidth_hz, &nominal_rate); zl10353_single_write(fe, TRL_NOMINAL_RATE_1, msb(nominal_rate)); zl10353_single_write(fe, TRL_NOMINAL_RATE_0, lsb(nominal_rate)); state->bandwidth = c->bandwidth_hz; zl10353_calc_input_freq(fe, &input_freq); zl10353_single_write(fe, INPUT_FREQ_1, msb(input_freq)); zl10353_single_write(fe, INPUT_FREQ_0, lsb(input_freq)); /* Hint at TPS settings */ switch (c->code_rate_HP) { case FEC_2_3: tps |= (1 << 7); break; case FEC_3_4: tps |= (2 << 7); break; case FEC_5_6: tps |= (3 << 7); break; case FEC_7_8: tps |= (4 << 7); break; case FEC_1_2: case FEC_AUTO: break; default: return -EINVAL; } switch (c->code_rate_LP) { case FEC_2_3: tps |= (1 << 4); break; case FEC_3_4: tps |= (2 << 4); break; case FEC_5_6: tps |= (3 << 4); break; case FEC_7_8: tps |= (4 << 4); break; case FEC_1_2: case FEC_AUTO: break; case FEC_NONE: if (c->hierarchy == HIERARCHY_AUTO || c->hierarchy == HIERARCHY_NONE) break; fallthrough; default: return -EINVAL; } switch (c->modulation) { case QPSK: break; case QAM_AUTO: case QAM_16: tps |= (1 << 13); break; case QAM_64: tps |= (2 << 13); break; default: return -EINVAL; } switch (c->transmission_mode) { case TRANSMISSION_MODE_2K: case TRANSMISSION_MODE_AUTO: break; case TRANSMISSION_MODE_8K: tps |= (1 << 0); break; default: return -EINVAL; } switch (c->guard_interval) { case GUARD_INTERVAL_1_32: case GUARD_INTERVAL_AUTO: break; case GUARD_INTERVAL_1_16: tps |= (1 << 2); break; case GUARD_INTERVAL_1_8: tps |= (2 << 2); break; case GUARD_INTERVAL_1_4: tps |= (3 << 2); break; default: return -EINVAL; } switch (c->hierarchy) { case HIERARCHY_AUTO: case HIERARCHY_NONE: break; case HIERARCHY_1: tps |= (1 << 10); break; case HIERARCHY_2: tps |= (2 << 10); break; case HIERARCHY_4: tps |= (3 << 10); break; default: return -EINVAL; } zl10353_single_write(fe, TPS_GIVEN_1, msb(tps)); zl10353_single_write(fe, TPS_GIVEN_0, lsb(tps)); if (fe->ops.i2c_gate_ctrl) fe->ops.i2c_gate_ctrl(fe, 0); /* * If there is no tuner attached to the secondary I2C bus, we call * set_params to program a potential tuner attached somewhere else. * Otherwise, we update the PLL registers via calc_regs. */ if (state->config.no_tuner) { if (fe->ops.tuner_ops.set_params) { fe->ops.tuner_ops.set_params(fe); if (fe->ops.i2c_gate_ctrl) fe->ops.i2c_gate_ctrl(fe, 0); } } else if (fe->ops.tuner_ops.calc_regs) { fe->ops.tuner_ops.calc_regs(fe, pllbuf + 1, 5); pllbuf[1] <<= 1; zl10353_write(fe, pllbuf, sizeof(pllbuf)); } zl10353_single_write(fe, 0x5F, 0x13); /* If no attached tuner or invalid PLL registers, just start the FSM. */ if (state->config.no_tuner || fe->ops.tuner_ops.calc_regs == NULL) zl10353_single_write(fe, FSM_GO, 0x01); else zl10353_single_write(fe, TUNER_GO, 0x01); return 0; } static int zl10353_get_parameters(struct dvb_frontend *fe, struct dtv_frontend_properties *c) { struct zl10353_state *state = fe->demodulator_priv; int s6, s9; u16 tps; static const u8 tps_fec_to_api[8] = { FEC_1_2, FEC_2_3, FEC_3_4, FEC_5_6, FEC_7_8, FEC_AUTO, FEC_AUTO, FEC_AUTO }; s6 = zl10353_read_register(state, STATUS_6); s9 = zl10353_read_register(state, STATUS_9); if (s6 < 0 || s9 < 0) return -EREMOTEIO; if ((s6 & (1 << 5)) == 0 || (s9 & (1 << 4)) == 0) return -EINVAL; /* no FE or TPS lock */ tps = zl10353_read_register(state, TPS_RECEIVED_1) << 8 | zl10353_read_register(state, TPS_RECEIVED_0); c->code_rate_HP = tps_fec_to_api[(tps >> 7) & 7]; c->code_rate_LP = tps_fec_to_api[(tps >> 4) & 7]; switch ((tps >> 13) & 3) { case 0: c->modulation = QPSK; break; case 1: c->modulation = QAM_16; break; case 2: c->modulation = QAM_64; break; default: c->modulation = QAM_AUTO; break; } c->transmission_mode = (tps & 0x01) ? TRANSMISSION_MODE_8K : TRANSMISSION_MODE_2K; switch ((tps >> 2) & 3) { case 0: c->guard_interval = GUARD_INTERVAL_1_32; break; case 1: c->guard_interval = GUARD_INTERVAL_1_16; break; case 2: c->guard_interval = GUARD_INTERVAL_1_8; break; case 3: c->guard_interval = GUARD_INTERVAL_1_4; break; default: c->guard_interval = GUARD_INTERVAL_AUTO; break; } switch ((tps >> 10) & 7) { case 0: c->hierarchy = HIERARCHY_NONE; break; case 1: c->hierarchy = HIERARCHY_1; break; case 2: c->hierarchy = HIERARCHY_2; break; case 3: c->hierarchy = HIERARCHY_4; break; default: c->hierarchy = HIERARCHY_AUTO; break; } c->frequency = state->frequency; c->bandwidth_hz = state->bandwidth; c->inversion = INVERSION_AUTO; return 0; } static int zl10353_read_status(struct dvb_frontend *fe, enum fe_status *status) { struct zl10353_state *state = fe->demodulator_priv; int s6, s7, s8; if ((s6 = zl10353_read_register(state, STATUS_6)) < 0) return -EREMOTEIO; if ((s7 = zl10353_read_register(state, STATUS_7)) < 0) return -EREMOTEIO; if ((s8 = zl10353_read_register(state, STATUS_8)) < 0) return -EREMOTEIO; *status = 0; if (s6 & (1 << 2)) *status |= FE_HAS_CARRIER; if (s6 & (1 << 1)) *status |= FE_HAS_VITERBI; if (s6 & (1 << 5)) *status |= FE_HAS_LOCK; if (s7 & (1 << 4)) *status |= FE_HAS_SYNC; if (s8 & (1 << 6)) *status |= FE_HAS_SIGNAL; if ((*status & (FE_HAS_CARRIER | FE_HAS_VITERBI | FE_HAS_SYNC)) != (FE_HAS_CARRIER | FE_HAS_VITERBI | FE_HAS_SYNC)) *status &= ~FE_HAS_LOCK; return 0; } static int zl10353_read_ber(struct dvb_frontend *fe, u32 *ber) { struct zl10353_state *state = fe->demodulator_priv; *ber = zl10353_read_register(state, RS_ERR_CNT_2) << 16 | zl10353_read_register(state, RS_ERR_CNT_1) << 8 | zl10353_read_register(state, RS_ERR_CNT_0); return 0; } static int zl10353_read_signal_strength(struct dvb_frontend *fe, u16 *strength) { struct zl10353_state *state = fe->demodulator_priv; u16 signal = zl10353_read_register(state, AGC_GAIN_1) << 10 | zl10353_read_register(state, AGC_GAIN_0) << 2 | 3; *strength = ~signal; return 0; } static int zl10353_read_snr(struct dvb_frontend *fe, u16 *snr) { struct zl10353_state *state = fe->demodulator_priv; u8 _snr; if (debug_regs) zl10353_dump_regs(fe); _snr = zl10353_read_register(state, SNR); *snr = 10 * _snr / 8; return 0; } static int zl10353_read_ucblocks(struct dvb_frontend *fe, u32 *ucblocks) { struct zl10353_state *state = fe->demodulator_priv; u32 ubl = 0; ubl = zl10353_read_register(state, RS_UBC_1) << 8 | zl10353_read_register(state, RS_UBC_0); state->ucblocks += ubl; *ucblocks = state->ucblocks; return 0; } static int zl10353_get_tune_settings(struct dvb_frontend *fe, struct dvb_frontend_tune_settings *fe_tune_settings) { fe_tune_settings->min_delay_ms = 1000; fe_tune_settings->step_size = 0; fe_tune_settings->max_drift = 0; return 0; } static int zl10353_init(struct dvb_frontend *fe) { struct zl10353_state *state = fe->demodulator_priv; u8 zl10353_reset_attach[6] = { 0x50, 0x03, 0x64, 0x46, 0x15, 0x0F }; if (debug_regs) zl10353_dump_regs(fe); if (state->config.parallel_ts) zl10353_reset_attach[2] &= ~0x20; if (state->config.clock_ctl_1) zl10353_reset_attach[3] = state->config.clock_ctl_1; if (state->config.pll_0) zl10353_reset_attach[4] = state->config.pll_0; /* Do a "hard" reset if not already done */ if (zl10353_read_register(state, 0x50) != zl10353_reset_attach[1] || zl10353_read_register(state, 0x51) != zl10353_reset_attach[2]) { zl10353_write(fe, zl10353_reset_attach, sizeof(zl10353_reset_attach)); if (debug_regs) zl10353_dump_regs(fe); } return 0; } static int zl10353_i2c_gate_ctrl(struct dvb_frontend* fe, int enable) { struct zl10353_state *state = fe->demodulator_priv; u8 val = 0x0a; if (state->config.disable_i2c_gate_ctrl) { /* No tuner attached to the internal I2C bus */ /* If set enable I2C bridge, the main I2C bus stopped hardly */ return 0; } if (enable) val |= 0x10; return zl10353_single_write(fe, 0x62, val); } static void zl10353_release(struct dvb_frontend *fe) { struct zl10353_state *state = fe->demodulator_priv; kfree(state); } static const struct dvb_frontend_ops zl10353_ops; struct dvb_frontend *zl10353_attach(const struct zl10353_config *config, struct i2c_adapter *i2c) { struct zl10353_state *state = NULL; int id; /* allocate memory for the internal state */ state = kzalloc(sizeof(struct zl10353_state), GFP_KERNEL); if (state == NULL) goto error; /* setup the state */ state->i2c = i2c; memcpy(&state->config, config, sizeof(struct zl10353_config)); /* check if the demod is there */ id = zl10353_read_register(state, CHIP_ID); if ((id != ID_ZL10353) && (id != ID_CE6230) && (id != ID_CE6231)) goto error; /* create dvb_frontend */ memcpy(&state->frontend.ops, &zl10353_ops, sizeof(struct dvb_frontend_ops)); state->frontend.demodulator_priv = state; return &state->frontend; error: kfree(state); return NULL; } static const struct dvb_frontend_ops zl10353_ops = { .delsys = { SYS_DVBT }, .info = { .name = "Zarlink ZL10353 DVB-T", .frequency_min_hz = 174 * MHz, .frequency_max_hz = 862 * MHz, .frequency_stepsize_hz = 166667, .caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 | FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO | FE_CAN_QPSK | FE_CAN_QAM_16 | FE_CAN_QAM_64 | FE_CAN_QAM_AUTO | FE_CAN_TRANSMISSION_MODE_AUTO | FE_CAN_GUARD_INTERVAL_AUTO | FE_CAN_HIERARCHY_AUTO | FE_CAN_RECOVER | FE_CAN_MUTE_TS }, .release = zl10353_release, .init = zl10353_init, .sleep = zl10353_sleep, .i2c_gate_ctrl = zl10353_i2c_gate_ctrl, .write = zl10353_write, .set_frontend = zl10353_set_parameters, .get_frontend = zl10353_get_parameters, .get_tune_settings = zl10353_get_tune_settings, .read_status = zl10353_read_status, .read_ber = zl10353_read_ber, .read_signal_strength = zl10353_read_signal_strength, .read_snr = zl10353_read_snr, .read_ucblocks = zl10353_read_ucblocks, }; module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off)."); module_param(debug_regs, int, 0644); MODULE_PARM_DESC(debug_regs, "Turn on/off frontend register dumps (default:off)."); MODULE_DESCRIPTION("Zarlink ZL10353 DVB-T demodulator driver"); MODULE_AUTHOR("Chris Pascoe"); MODULE_LICENSE("GPL"); EXPORT_SYMBOL_GPL(zl10353_attach);
20 21 52 52 52 52 52 52 51 96 88 23 96 52 95 13 12 11 11 11 11 10 9 6 4 52 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 // SPDX-License-Identifier: GPL-2.0-or-later /* * ip_vs_est.c: simple rate estimator for IPVS * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> * Network name space (netns) aware. * Global data moved to netns i.e struct netns_ipvs * Affected data: est_list and est_lock. * estimation_timer() runs with timer per netns. * get_stats()) do the per cpu summing. */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/types.h> #include <linux/interrupt.h> #include <linux/sysctl.h> #include <linux/list.h> #include <linux/rcupdate_wait.h> #include <net/ip_vs.h> /* This code is to estimate rate in a shorter interval (such as 8 seconds) for virtual services and real servers. For measure rate in a long interval, it is easy to implement a user level daemon which periodically reads those statistical counters and measure rate. We measure rate during the last 8 seconds every 2 seconds: avgrate = avgrate*(1-W) + rate*W where W = 2^(-2) NOTES. * Average bps is scaled by 2^5, while average pps and cps are scaled by 2^10. * Netlink users can see 64-bit values but sockopt users are restricted to 32-bit values for conns, packets, bps, cps and pps. * A lot of code is taken from net/core/gen_estimator.c KEY POINTS: - cpustats counters are updated per-cpu in SoftIRQ context with BH disabled - kthreads read the cpustats to update the estimators (svcs, dests, total) - the states of estimators can be read (get stats) or modified (zero stats) from processes KTHREADS: - estimators are added initially to est_temp_list and later kthread 0 distributes them to one or many kthreads for estimation - kthread contexts are created and attached to array - the kthread tasks are started when first service is added, before that the total stats are not estimated - when configuration (cpulist/nice) is changed, the tasks are restarted by work (est_reload_work) - kthread tasks are stopped while the cpulist is empty - the kthread context holds lists with estimators (chains) which are processed every 2 seconds - as estimators can be added dynamically and in bursts, we try to spread them to multiple chains which are estimated at different time - on start, kthread 0 enters calculation phase to determine the chain limits and the limit of estimators per kthread - est_add_ktid: ktid where to add new ests, can point to empty slot where we should add kt data */ static struct lock_class_key __ipvs_est_key; static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs); static void ip_vs_est_drain_temp_list(struct netns_ipvs *ipvs); static void ip_vs_chain_estimation(struct hlist_head *chain) { struct ip_vs_estimator *e; struct ip_vs_cpu_stats *c; struct ip_vs_stats *s; u64 rate; hlist_for_each_entry_rcu(e, chain, list) { u64 conns, inpkts, outpkts, inbytes, outbytes; u64 kconns = 0, kinpkts = 0, koutpkts = 0; u64 kinbytes = 0, koutbytes = 0; unsigned int start; int i; if (kthread_should_stop()) break; s = container_of(e, struct ip_vs_stats, est); for_each_possible_cpu(i) { c = per_cpu_ptr(s->cpustats, i); do { start = u64_stats_fetch_begin(&c->syncp); conns = u64_stats_read(&c->cnt.conns); inpkts = u64_stats_read(&c->cnt.inpkts); outpkts = u64_stats_read(&c->cnt.outpkts); inbytes = u64_stats_read(&c->cnt.inbytes); outbytes = u64_stats_read(&c->cnt.outbytes); } while (u64_stats_fetch_retry(&c->syncp, start)); kconns += conns; kinpkts += inpkts; koutpkts += outpkts; kinbytes += inbytes; koutbytes += outbytes; } spin_lock(&s->lock); s->kstats.conns = kconns; s->kstats.inpkts = kinpkts; s->kstats.outpkts = koutpkts; s->kstats.inbytes = kinbytes; s->kstats.outbytes = koutbytes; /* scaled by 2^10, but divided 2 seconds */ rate = (s->kstats.conns - e->last_conns) << 9; e->last_conns = s->kstats.conns; e->cps += ((s64)rate - (s64)e->cps) >> 2; rate = (s->kstats.inpkts - e->last_inpkts) << 9; e->last_inpkts = s->kstats.inpkts; e->inpps += ((s64)rate - (s64)e->inpps) >> 2; rate = (s->kstats.outpkts - e->last_outpkts) << 9; e->last_outpkts = s->kstats.outpkts; e->outpps += ((s64)rate - (s64)e->outpps) >> 2; /* scaled by 2^5, but divided 2 seconds */ rate = (s->kstats.inbytes - e->last_inbytes) << 4; e->last_inbytes = s->kstats.inbytes; e->inbps += ((s64)rate - (s64)e->inbps) >> 2; rate = (s->kstats.outbytes - e->last_outbytes) << 4; e->last_outbytes = s->kstats.outbytes; e->outbps += ((s64)rate - (s64)e->outbps) >> 2; spin_unlock(&s->lock); } } static void ip_vs_tick_estimation(struct ip_vs_est_kt_data *kd, int row) { struct ip_vs_est_tick_data *td; int cid; rcu_read_lock(); td = rcu_dereference(kd->ticks[row]); if (!td) goto out; for_each_set_bit(cid, td->present, IPVS_EST_TICK_CHAINS) { if (kthread_should_stop()) break; ip_vs_chain_estimation(&td->chains[cid]); cond_resched_rcu(); td = rcu_dereference(kd->ticks[row]); if (!td) break; } out: rcu_read_unlock(); } static int ip_vs_estimation_kthread(void *data) { struct ip_vs_est_kt_data *kd = data; struct netns_ipvs *ipvs = kd->ipvs; int row = kd->est_row; unsigned long now; int id = kd->id; long gap; if (id > 0) { if (!ipvs->est_chain_max) return 0; } else { if (!ipvs->est_chain_max) { ipvs->est_calc_phase = 1; /* commit est_calc_phase before reading est_genid */ smp_mb(); } /* kthread 0 will handle the calc phase */ if (ipvs->est_calc_phase) ip_vs_est_calc_phase(ipvs); } while (1) { if (!id && !hlist_empty(&ipvs->est_temp_list)) ip_vs_est_drain_temp_list(ipvs); set_current_state(TASK_IDLE); if (kthread_should_stop()) break; /* before estimation, check if we should sleep */ now = jiffies; gap = kd->est_timer - now; if (gap > 0) { if (gap > IPVS_EST_TICK) { kd->est_timer = now - IPVS_EST_TICK; gap = IPVS_EST_TICK; } schedule_timeout(gap); } else { __set_current_state(TASK_RUNNING); if (gap < -8 * IPVS_EST_TICK) kd->est_timer = now; } if (kd->tick_len[row]) ip_vs_tick_estimation(kd, row); row++; if (row >= IPVS_EST_NTICKS) row = 0; WRITE_ONCE(kd->est_row, row); kd->est_timer += IPVS_EST_TICK; } __set_current_state(TASK_RUNNING); return 0; } /* Schedule stop/start for kthread tasks */ void ip_vs_est_reload_start(struct netns_ipvs *ipvs) { /* Ignore reloads before first service is added */ if (!READ_ONCE(ipvs->enable)) return; ip_vs_est_stopped_recalc(ipvs); /* Bump the kthread configuration genid */ atomic_inc(&ipvs->est_genid); queue_delayed_work(system_long_wq, &ipvs->est_reload_work, 0); } /* Start kthread task with current configuration */ int ip_vs_est_kthread_start(struct netns_ipvs *ipvs, struct ip_vs_est_kt_data *kd) { unsigned long now; int ret = 0; long gap; lockdep_assert_held(&ipvs->est_mutex); if (kd->task) goto out; now = jiffies; gap = kd->est_timer - now; /* Sync est_timer if task is starting later */ if (abs(gap) > 4 * IPVS_EST_TICK) kd->est_timer = now; kd->task = kthread_create(ip_vs_estimation_kthread, kd, "ipvs-e:%d:%d", ipvs->gen, kd->id); if (IS_ERR(kd->task)) { ret = PTR_ERR(kd->task); kd->task = NULL; goto out; } set_user_nice(kd->task, sysctl_est_nice(ipvs)); if (sysctl_est_preferred_cpulist(ipvs)) kthread_affine_preferred(kd->task, sysctl_est_preferred_cpulist(ipvs)); pr_info("starting estimator thread %d...\n", kd->id); wake_up_process(kd->task); out: return ret; } void ip_vs_est_kthread_stop(struct ip_vs_est_kt_data *kd) { if (kd->task) { pr_info("stopping estimator thread %d...\n", kd->id); kthread_stop(kd->task); kd->task = NULL; } } /* Apply parameters to kthread */ static void ip_vs_est_set_params(struct netns_ipvs *ipvs, struct ip_vs_est_kt_data *kd) { kd->chain_max = ipvs->est_chain_max; /* We are using single chain on RCU preemption */ if (IPVS_EST_TICK_CHAINS == 1) kd->chain_max *= IPVS_EST_CHAIN_FACTOR; kd->tick_max = IPVS_EST_TICK_CHAINS * kd->chain_max; kd->est_max_count = IPVS_EST_NTICKS * kd->tick_max; } /* Create and start estimation kthread in a free or new array slot */ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs) { struct ip_vs_est_kt_data *kd = NULL; int id = ipvs->est_kt_count; int ret = -ENOMEM; void *arr = NULL; int i; if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads && READ_ONCE(ipvs->enable) && ipvs->est_max_threads) return -EINVAL; mutex_lock(&ipvs->est_mutex); for (i = 0; i < id; i++) { if (!ipvs->est_kt_arr[i]) break; } if (i >= id) { arr = krealloc_array(ipvs->est_kt_arr, id + 1, sizeof(struct ip_vs_est_kt_data *), GFP_KERNEL); if (!arr) goto out; ipvs->est_kt_arr = arr; } else { id = i; } kd = kzalloc(sizeof(*kd), GFP_KERNEL); if (!kd) goto out; kd->ipvs = ipvs; bitmap_fill(kd->avail, IPVS_EST_NTICKS); kd->est_timer = jiffies; kd->id = id; ip_vs_est_set_params(ipvs, kd); /* Pre-allocate stats used in calc phase */ if (!id && !kd->calc_stats) { kd->calc_stats = ip_vs_stats_alloc(); if (!kd->calc_stats) goto out; } /* Start kthread tasks only when services are present */ if (READ_ONCE(ipvs->enable) && !ip_vs_est_stopped(ipvs)) { ret = ip_vs_est_kthread_start(ipvs, kd); if (ret < 0) goto out; } if (arr) ipvs->est_kt_count++; ipvs->est_kt_arr[id] = kd; kd = NULL; /* Use most recent kthread for new ests */ ipvs->est_add_ktid = id; ret = 0; out: mutex_unlock(&ipvs->est_mutex); if (kd) { ip_vs_stats_free(kd->calc_stats); kfree(kd); } return ret; } /* Select ktid where to add new ests: available, unused or new slot */ static void ip_vs_est_update_ktid(struct netns_ipvs *ipvs) { int ktid, best = ipvs->est_kt_count; struct ip_vs_est_kt_data *kd; for (ktid = 0; ktid < ipvs->est_kt_count; ktid++) { kd = ipvs->est_kt_arr[ktid]; if (kd) { if (kd->est_count < kd->est_max_count) { best = ktid; break; } } else if (ktid < best) { best = ktid; } } ipvs->est_add_ktid = best; } /* Add estimator to current kthread (est_add_ktid) */ static int ip_vs_enqueue_estimator(struct netns_ipvs *ipvs, struct ip_vs_estimator *est) { struct ip_vs_est_kt_data *kd = NULL; struct ip_vs_est_tick_data *td; int ktid, row, crow, cid, ret; int delay = est->ktrow; BUILD_BUG_ON_MSG(IPVS_EST_TICK_CHAINS > 127, "Too many chains for ktcid"); if (ipvs->est_add_ktid < ipvs->est_kt_count) { kd = ipvs->est_kt_arr[ipvs->est_add_ktid]; if (kd) goto add_est; } ret = ip_vs_est_add_kthread(ipvs); if (ret < 0) goto out; kd = ipvs->est_kt_arr[ipvs->est_add_ktid]; add_est: ktid = kd->id; /* For small number of estimators prefer to use few ticks, * otherwise try to add into the last estimated row. * est_row and add_row point after the row we should use */ if (kd->est_count >= 2 * kd->tick_max || delay < IPVS_EST_NTICKS - 1) crow = READ_ONCE(kd->est_row); else crow = kd->add_row; crow += delay; if (crow >= IPVS_EST_NTICKS) crow -= IPVS_EST_NTICKS; /* Assume initial delay ? */ if (delay >= IPVS_EST_NTICKS - 1) { /* Preserve initial delay or decrease it if no space in tick */ row = crow; if (crow < IPVS_EST_NTICKS - 1) { crow++; row = find_last_bit(kd->avail, crow); } if (row >= crow) row = find_last_bit(kd->avail, IPVS_EST_NTICKS); } else { /* Preserve delay or increase it if no space in tick */ row = IPVS_EST_NTICKS; if (crow > 0) row = find_next_bit(kd->avail, IPVS_EST_NTICKS, crow); if (row >= IPVS_EST_NTICKS) row = find_first_bit(kd->avail, IPVS_EST_NTICKS); } td = rcu_dereference_protected(kd->ticks[row], 1); if (!td) { td = kzalloc(sizeof(*td), GFP_KERNEL); if (!td) { ret = -ENOMEM; goto out; } rcu_assign_pointer(kd->ticks[row], td); } cid = find_first_zero_bit(td->full, IPVS_EST_TICK_CHAINS); kd->est_count++; kd->tick_len[row]++; if (!td->chain_len[cid]) __set_bit(cid, td->present); td->chain_len[cid]++; est->ktid = ktid; est->ktrow = row; est->ktcid = cid; hlist_add_head_rcu(&est->list, &td->chains[cid]); if (td->chain_len[cid] >= kd->chain_max) { __set_bit(cid, td->full); if (kd->tick_len[row] >= kd->tick_max) __clear_bit(row, kd->avail); } /* Update est_add_ktid to point to first available/empty kt slot */ if (kd->est_count == kd->est_max_count) ip_vs_est_update_ktid(ipvs); ret = 0; out: return ret; } /* Start estimation for stats */ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) { struct ip_vs_estimator *est = &stats->est; int ret; if (!ipvs->est_max_threads && READ_ONCE(ipvs->enable)) ipvs->est_max_threads = ip_vs_est_max_threads(ipvs); est->ktid = -1; est->ktrow = IPVS_EST_NTICKS - 1; /* Initial delay */ /* We prefer this code to be short, kthread 0 will requeue the * estimator to available chain. If tasks are disabled, we * will not allocate much memory, just for kt 0. */ ret = 0; if (!ipvs->est_kt_count || !ipvs->est_kt_arr[0]) ret = ip_vs_est_add_kthread(ipvs); if (ret >= 0) hlist_add_head(&est->list, &ipvs->est_temp_list); else INIT_HLIST_NODE(&est->list); return ret; } static void ip_vs_est_kthread_destroy(struct ip_vs_est_kt_data *kd) { if (kd) { if (kd->task) { pr_info("stop unused estimator thread %d...\n", kd->id); kthread_stop(kd->task); } ip_vs_stats_free(kd->calc_stats); kfree(kd); } } /* Unlink estimator from chain */ void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) { struct ip_vs_estimator *est = &stats->est; struct ip_vs_est_tick_data *td; struct ip_vs_est_kt_data *kd; int ktid = est->ktid; int row = est->ktrow; int cid = est->ktcid; /* Failed to add to chain ? */ if (hlist_unhashed(&est->list)) return; /* On return, estimator can be freed, dequeue it now */ /* In est_temp_list ? */ if (ktid < 0) { hlist_del(&est->list); goto end_kt0; } hlist_del_rcu(&est->list); kd = ipvs->est_kt_arr[ktid]; td = rcu_dereference_protected(kd->ticks[row], 1); __clear_bit(cid, td->full); td->chain_len[cid]--; if (!td->chain_len[cid]) __clear_bit(cid, td->present); kd->tick_len[row]--; __set_bit(row, kd->avail); if (!kd->tick_len[row]) { RCU_INIT_POINTER(kd->ticks[row], NULL); kfree_rcu(td, rcu_head); } kd->est_count--; if (kd->est_count) { /* This kt slot can become available just now, prefer it */ if (ktid < ipvs->est_add_ktid) ipvs->est_add_ktid = ktid; return; } if (ktid > 0) { mutex_lock(&ipvs->est_mutex); ip_vs_est_kthread_destroy(kd); ipvs->est_kt_arr[ktid] = NULL; if (ktid == ipvs->est_kt_count - 1) { ipvs->est_kt_count--; while (ipvs->est_kt_count > 1 && !ipvs->est_kt_arr[ipvs->est_kt_count - 1]) ipvs->est_kt_count--; } mutex_unlock(&ipvs->est_mutex); /* This slot is now empty, prefer another available kt slot */ if (ktid == ipvs->est_add_ktid) ip_vs_est_update_ktid(ipvs); } end_kt0: /* kt 0 is freed after all other kthreads and chains are empty */ if (ipvs->est_kt_count == 1 && hlist_empty(&ipvs->est_temp_list)) { kd = ipvs->est_kt_arr[0]; if (!kd || !kd->est_count) { mutex_lock(&ipvs->est_mutex); if (kd) { ip_vs_est_kthread_destroy(kd); ipvs->est_kt_arr[0] = NULL; } ipvs->est_kt_count--; mutex_unlock(&ipvs->est_mutex); ipvs->est_add_ktid = 0; } } } /* Register all ests from est_temp_list to kthreads */ static void ip_vs_est_drain_temp_list(struct netns_ipvs *ipvs) { struct ip_vs_estimator *est; while (1) { int max = 16; mutex_lock(&__ip_vs_mutex); while (max-- > 0) { est = hlist_entry_safe(ipvs->est_temp_list.first, struct ip_vs_estimator, list); if (est) { if (kthread_should_stop()) goto unlock; hlist_del_init(&est->list); if (ip_vs_enqueue_estimator(ipvs, est) >= 0) continue; est->ktid = -1; hlist_add_head(&est->list, &ipvs->est_temp_list); /* Abort, some entries will not be estimated * until next attempt */ } goto unlock; } mutex_unlock(&__ip_vs_mutex); cond_resched(); } unlock: mutex_unlock(&__ip_vs_mutex); } /* Calculate limits for all kthreads */ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max) { DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); struct ip_vs_est_kt_data *kd; struct hlist_head chain; struct ip_vs_stats *s; int cache_factor = 4; int i, loops, ntest; s32 min_est = 0; ktime_t t1, t2; int max = 8; int ret = 1; s64 diff; u64 val; INIT_HLIST_HEAD(&chain); mutex_lock(&__ip_vs_mutex); kd = ipvs->est_kt_arr[0]; mutex_unlock(&__ip_vs_mutex); s = kd ? kd->calc_stats : NULL; if (!s) goto out; hlist_add_head(&s->est.list, &chain); loops = 1; /* Get best result from many tests */ for (ntest = 0; ntest < 12; ntest++) { if (!(ntest & 3)) { /* Wait for cpufreq frequency transition */ wait_event_idle_timeout(wq, kthread_should_stop(), HZ / 50); if (!READ_ONCE(ipvs->enable) || kthread_should_stop()) goto stop; } local_bh_disable(); rcu_read_lock(); /* Put stats in cache */ ip_vs_chain_estimation(&chain); t1 = ktime_get(); for (i = loops * cache_factor; i > 0; i--) ip_vs_chain_estimation(&chain); t2 = ktime_get(); rcu_read_unlock(); local_bh_enable(); if (!READ_ONCE(ipvs->enable) || kthread_should_stop()) goto stop; cond_resched(); diff = ktime_to_ns(ktime_sub(t2, t1)); if (diff <= 1 * NSEC_PER_USEC) { /* Do more loops on low time resolution */ loops *= 2; continue; } if (diff >= NSEC_PER_SEC) continue; val = diff; do_div(val, loops); if (!min_est || val < min_est) { min_est = val; /* goal: 95usec per chain */ val = 95 * NSEC_PER_USEC; if (val >= min_est) { do_div(val, min_est); max = (int)val; } else { max = 1; } } } out: if (s) hlist_del_init(&s->est.list); *chain_max = max; return ret; stop: ret = 0; goto out; } /* Calculate the parameters and apply them in context of kt #0 * ECP: est_calc_phase * ECM: est_chain_max * ECP ECM Insert Chain enable Description * --------------------------------------------------------------------------- * 0 0 est_temp_list 0 create kt #0 context * 0 0 est_temp_list 0->1 service added, start kthread #0 task * 0->1 0 est_temp_list 1 kt task #0 started, enters calc phase * 1 0 est_temp_list 1 kt #0: determine est_chain_max, * stop tasks, move ests to est_temp_list * and free kd for kthreads 1..last * 1->0 0->N kt chains 1 ests can go to kthreads * 0 N kt chains 1 drain est_temp_list, create new kthread * contexts, start tasks, estimate */ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs) { int genid = atomic_read(&ipvs->est_genid); struct ip_vs_est_tick_data *td; struct ip_vs_est_kt_data *kd; struct ip_vs_estimator *est; struct ip_vs_stats *stats; int id, row, cid, delay; bool last, last_td; int chain_max; int step; if (!ip_vs_est_calc_limits(ipvs, &chain_max)) return; mutex_lock(&__ip_vs_mutex); /* Stop all other tasks, so that we can immediately move the * estimators to est_temp_list without RCU grace period */ mutex_lock(&ipvs->est_mutex); for (id = 1; id < ipvs->est_kt_count; id++) { /* netns clean up started, abort */ if (!READ_ONCE(ipvs->enable)) goto unlock2; kd = ipvs->est_kt_arr[id]; if (!kd) continue; ip_vs_est_kthread_stop(kd); } mutex_unlock(&ipvs->est_mutex); /* Move all estimators to est_temp_list but carefully, * all estimators and kthread data can be released while * we reschedule. Even for kthread 0. */ step = 0; /* Order entries in est_temp_list in ascending delay, so now * walk delay(desc), id(desc), cid(asc) */ delay = IPVS_EST_NTICKS; next_delay: delay--; if (delay < 0) goto end_dequeue; last_kt: /* Destroy contexts backwards */ id = ipvs->est_kt_count; next_kt: if (!READ_ONCE(ipvs->enable) || kthread_should_stop()) goto unlock; id--; if (id < 0) goto next_delay; kd = ipvs->est_kt_arr[id]; if (!kd) goto next_kt; /* kt 0 can exist with empty chains */ if (!id && kd->est_count <= 1) goto next_delay; row = kd->est_row + delay; if (row >= IPVS_EST_NTICKS) row -= IPVS_EST_NTICKS; td = rcu_dereference_protected(kd->ticks[row], 1); if (!td) goto next_kt; cid = 0; walk_chain: if (kthread_should_stop()) goto unlock; step++; if (!(step & 63)) { /* Give chance estimators to be added (to est_temp_list) * and deleted (releasing kthread contexts) */ mutex_unlock(&__ip_vs_mutex); cond_resched(); mutex_lock(&__ip_vs_mutex); /* Current kt released ? */ if (id >= ipvs->est_kt_count) goto last_kt; if (kd != ipvs->est_kt_arr[id]) goto next_kt; /* Current td released ? */ if (td != rcu_dereference_protected(kd->ticks[row], 1)) goto next_kt; /* No fatal changes on the current kd and td */ } est = hlist_entry_safe(td->chains[cid].first, struct ip_vs_estimator, list); if (!est) { cid++; if (cid >= IPVS_EST_TICK_CHAINS) goto next_kt; goto walk_chain; } /* We can cheat and increase est_count to protect kt 0 context * from release but we prefer to keep the last estimator */ last = kd->est_count <= 1; /* Do not free kt #0 data */ if (!id && last) goto next_delay; last_td = kd->tick_len[row] <= 1; stats = container_of(est, struct ip_vs_stats, est); ip_vs_stop_estimator(ipvs, stats); /* Tasks are stopped, move without RCU grace period */ est->ktid = -1; est->ktrow = row - kd->est_row; if (est->ktrow < 0) est->ktrow += IPVS_EST_NTICKS; hlist_add_head(&est->list, &ipvs->est_temp_list); /* kd freed ? */ if (last) goto next_kt; /* td freed ? */ if (last_td) goto next_kt; goto walk_chain; end_dequeue: /* All estimators removed while calculating ? */ if (!ipvs->est_kt_count) goto unlock; kd = ipvs->est_kt_arr[0]; if (!kd) goto unlock; kd->add_row = kd->est_row; ipvs->est_chain_max = chain_max; ip_vs_est_set_params(ipvs, kd); pr_info("using max %d ests per chain, %d per kthread\n", kd->chain_max, kd->est_max_count); /* Try to keep tot_stats in kt0, enqueue it early */ if (ipvs->tot_stats && !hlist_unhashed(&ipvs->tot_stats->s.est.list) && ipvs->tot_stats->s.est.ktid == -1) { hlist_del(&ipvs->tot_stats->s.est.list); hlist_add_head(&ipvs->tot_stats->s.est.list, &ipvs->est_temp_list); } mutex_lock(&ipvs->est_mutex); /* We completed the calc phase, new calc phase not requested */ if (genid == atomic_read(&ipvs->est_genid)) ipvs->est_calc_phase = 0; unlock2: mutex_unlock(&ipvs->est_mutex); unlock: mutex_unlock(&__ip_vs_mutex); } void ip_vs_zero_estimator(struct ip_vs_stats *stats) { struct ip_vs_estimator *est = &stats->est; struct ip_vs_kstats *k = &stats->kstats; /* reset counters, caller must hold the stats->lock lock */ est->last_inbytes = k->inbytes; est->last_outbytes = k->outbytes; est->last_conns = k->conns; est->last_inpkts = k->inpkts; est->last_outpkts = k->outpkts; est->cps = 0; est->inpps = 0; est->outpps = 0; est->inbps = 0; est->outbps = 0; } /* Get decoded rates */ void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats) { struct ip_vs_estimator *e = &stats->est; dst->cps = (e->cps + 0x1FF) >> 10; dst->inpps = (e->inpps + 0x1FF) >> 10; dst->outpps = (e->outpps + 0x1FF) >> 10; dst->inbps = (e->inbps + 0xF) >> 5; dst->outbps = (e->outbps + 0xF) >> 5; } int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs) { INIT_HLIST_HEAD(&ipvs->est_temp_list); ipvs->est_kt_arr = NULL; ipvs->est_max_threads = 0; ipvs->est_calc_phase = 0; ipvs->est_chain_max = 0; ipvs->est_kt_count = 0; ipvs->est_add_ktid = 0; atomic_set(&ipvs->est_genid, 0); atomic_set(&ipvs->est_genid_done, 0); __mutex_init(&ipvs->est_mutex, "ipvs->est_mutex", &__ipvs_est_key); return 0; } void __net_exit ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs) { int i; for (i = 0; i < ipvs->est_kt_count; i++) ip_vs_est_kthread_destroy(ipvs->est_kt_arr[i]); kfree(ipvs->est_kt_arr); mutex_destroy(&ipvs->est_mutex); }
17 17 19 15 153 129 13 7 13 9 16 1 23 9 6 3 17 3 15 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/binfmt_script.c * * Copyright (C) 1996 Martin von Löwis * original #!-checking implemented by tytso. */ #include <linux/module.h> #include <linux/string.h> #include <linux/stat.h> #include <linux/binfmts.h> #include <linux/init.h> #include <linux/file.h> #include <linux/err.h> #include <linux/fs.h> static inline bool spacetab(char c) { return c == ' ' || c == '\t'; } static inline const char *next_non_spacetab(const char *first, const char *last) { for (; first <= last; first++) if (!spacetab(*first)) return first; return NULL; } static inline const char *next_terminator(const char *first, const char *last) { for (; first <= last; first++) if (spacetab(*first) || !*first) return first; return NULL; } static int load_script(struct linux_binprm *bprm) { const char *i_name, *i_sep, *i_arg, *i_end, *buf_end; struct file *file; int retval; /* Not ours to exec if we don't start with "#!". */ if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!')) return -ENOEXEC; /* * This section handles parsing the #! line into separate * interpreter path and argument strings. We must be careful * because bprm->buf is not yet guaranteed to be NUL-terminated * (though the buffer will have trailing NUL padding when the * file size was smaller than the buffer size). * * We do not want to exec a truncated interpreter path, so either * we find a newline (which indicates nothing is truncated), or * we find a space/tab/NUL after the interpreter path (which * itself may be preceded by spaces/tabs). Truncating the * arguments is fine: the interpreter can re-read the script to * parse them on its own. */ buf_end = bprm->buf + sizeof(bprm->buf) - 1; i_end = strnchr(bprm->buf, sizeof(bprm->buf), '\n'); if (!i_end) { i_end = next_non_spacetab(bprm->buf + 2, buf_end); if (!i_end) return -ENOEXEC; /* Entire buf is spaces/tabs */ /* * If there is no later space/tab/NUL we must assume the * interpreter path is truncated. */ if (!next_terminator(i_end, buf_end)) return -ENOEXEC; i_end = buf_end; } /* Trim any trailing spaces/tabs from i_end */ while (spacetab(i_end[-1])) i_end--; /* Skip over leading spaces/tabs */ i_name = next_non_spacetab(bprm->buf+2, i_end); if (!i_name || (i_name == i_end)) return -ENOEXEC; /* No interpreter name found */ /* Is there an optional argument? */ i_arg = NULL; i_sep = next_terminator(i_name, i_end); if (i_sep && (*i_sep != '\0')) i_arg = next_non_spacetab(i_sep, i_end); /* * If the script filename will be inaccessible after exec, typically * because it is a "/dev/fd/<fd>/.." path against an O_CLOEXEC fd, give * up now (on the assumption that the interpreter will want to load * this file). */ if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) return -ENOENT; /* * OK, we've parsed out the interpreter name and * (optional) argument. * Splice in (1) the interpreter's name for argv[0] * (2) (optional) argument to interpreter * (3) filename of shell script (replace argv[0]) * * This is done in reverse order, because of how the * user environment and arguments are stored. */ retval = remove_arg_zero(bprm); if (retval) return retval; retval = copy_string_kernel(bprm->interp, bprm); if (retval < 0) return retval; bprm->argc++; *((char *)i_end) = '\0'; if (i_arg) { *((char *)i_sep) = '\0'; retval = copy_string_kernel(i_arg, bprm); if (retval < 0) return retval; bprm->argc++; } retval = copy_string_kernel(i_name, bprm); if (retval) return retval; bprm->argc++; retval = bprm_change_interp(i_name, bprm); if (retval < 0) return retval; /* * OK, now restart the process with the interpreter's dentry. */ file = open_exec(i_name); if (IS_ERR(file)) return PTR_ERR(file); bprm->interpreter = file; return 0; } static struct linux_binfmt script_format = { .module = THIS_MODULE, .load_binary = load_script, }; static int __init init_script_binfmt(void) { register_binfmt(&script_format); return 0; } static void __exit exit_script_binfmt(void) { unregister_binfmt(&script_format); } core_initcall(init_script_binfmt); module_exit(exit_script_binfmt); MODULE_DESCRIPTION("Kernel support for scripts starting with #!"); MODULE_LICENSE("GPL");
1 24 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> * Copyright (c) 2002 David S. Miller (davem@redhat.com) * Copyright (c) 2005 Herbert Xu <herbert@gondor.apana.org.au> * * Portions derived from Cryptoapi, by Alexander Kjeldaas <astor@fast.no> * and Nettle, by Niels Möller. */ #ifndef _CRYPTO_INTERNAL_CIPHER_H #define _CRYPTO_INTERNAL_CIPHER_H #include <crypto/algapi.h> struct crypto_cipher { struct crypto_tfm base; }; /** * DOC: Single Block Cipher API * * The single block cipher API is used with the ciphers of type * CRYPTO_ALG_TYPE_CIPHER (listed as type "cipher" in /proc/crypto). * * Using the single block cipher API calls, operations with the basic cipher * primitive can be implemented. These cipher primitives exclude any block * chaining operations including IV handling. * * The purpose of this single block cipher API is to support the implementation * of templates or other concepts that only need to perform the cipher operation * on one block at a time. Templates invoke the underlying cipher primitive * block-wise and process either the input or the output data of these cipher * operations. */ static inline struct crypto_cipher *__crypto_cipher_cast(struct crypto_tfm *tfm) { return (struct crypto_cipher *)tfm; } /** * crypto_alloc_cipher() - allocate single block cipher handle * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * single block cipher * @type: specifies the type of the cipher * @mask: specifies the mask for the cipher * * Allocate a cipher handle for a single block cipher. The returned struct * crypto_cipher is the cipher handle that is required for any subsequent API * invocation for that single block cipher. * * Return: allocated cipher handle in case of success; IS_ERR() is true in case * of an error, PTR_ERR() returns the error code. */ static inline struct crypto_cipher *crypto_alloc_cipher(const char *alg_name, u32 type, u32 mask) { type &= ~CRYPTO_ALG_TYPE_MASK; type |= CRYPTO_ALG_TYPE_CIPHER; mask |= CRYPTO_ALG_TYPE_MASK; return __crypto_cipher_cast(crypto_alloc_base(alg_name, type, mask)); } static inline struct crypto_tfm *crypto_cipher_tfm(struct crypto_cipher *tfm) { return &tfm->base; } /** * crypto_free_cipher() - zeroize and free the single block cipher handle * @tfm: cipher handle to be freed */ static inline void crypto_free_cipher(struct crypto_cipher *tfm) { crypto_free_tfm(crypto_cipher_tfm(tfm)); } /** * crypto_has_cipher() - Search for the availability of a single block cipher * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * single block cipher * @type: specifies the type of the cipher * @mask: specifies the mask for the cipher * * Return: true when the single block cipher is known to the kernel crypto API; * false otherwise */ static inline int crypto_has_cipher(const char *alg_name, u32 type, u32 mask) { type &= ~CRYPTO_ALG_TYPE_MASK; type |= CRYPTO_ALG_TYPE_CIPHER; mask |= CRYPTO_ALG_TYPE_MASK; return crypto_has_alg(alg_name, type, mask); } /** * crypto_cipher_blocksize() - obtain block size for cipher * @tfm: cipher handle * * The block size for the single block cipher referenced with the cipher handle * tfm is returned. The caller may use that information to allocate appropriate * memory for the data returned by the encryption or decryption operation * * Return: block size of cipher */ static inline unsigned int crypto_cipher_blocksize(struct crypto_cipher *tfm) { return crypto_tfm_alg_blocksize(crypto_cipher_tfm(tfm)); } static inline unsigned int crypto_cipher_alignmask(struct crypto_cipher *tfm) { return crypto_tfm_alg_alignmask(crypto_cipher_tfm(tfm)); } static inline u32 crypto_cipher_get_flags(struct crypto_cipher *tfm) { return crypto_tfm_get_flags(crypto_cipher_tfm(tfm)); } static inline void crypto_cipher_set_flags(struct crypto_cipher *tfm, u32 flags) { crypto_tfm_set_flags(crypto_cipher_tfm(tfm), flags); } static inline void crypto_cipher_clear_flags(struct crypto_cipher *tfm, u32 flags) { crypto_tfm_clear_flags(crypto_cipher_tfm(tfm), flags); } /** * crypto_cipher_setkey() - set key for cipher * @tfm: cipher handle * @key: buffer holding the key * @keylen: length of the key in bytes * * The caller provided key is set for the single block cipher referenced by the * cipher handle. * * Note, the key length determines the cipher type. Many block ciphers implement * different cipher modes depending on the key size, such as AES-128 vs AES-192 * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 * is performed. * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_cipher_setkey(struct crypto_cipher *tfm, const u8 *key, unsigned int keylen); /** * crypto_cipher_encrypt_one() - encrypt one block of plaintext * @tfm: cipher handle * @dst: points to the buffer that will be filled with the ciphertext * @src: buffer holding the plaintext to be encrypted * * Invoke the encryption operation of one block. The caller must ensure that * the plaintext and ciphertext buffers are at least one block in size. */ void crypto_cipher_encrypt_one(struct crypto_cipher *tfm, u8 *dst, const u8 *src); /** * crypto_cipher_decrypt_one() - decrypt one block of ciphertext * @tfm: cipher handle * @dst: points to the buffer that will be filled with the plaintext * @src: buffer holding the ciphertext to be decrypted * * Invoke the decryption operation of one block. The caller must ensure that * the plaintext and ciphertext buffers are at least one block in size. */ void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, u8 *dst, const u8 *src); struct crypto_cipher *crypto_clone_cipher(struct crypto_cipher *cipher); struct crypto_cipher_spawn { struct crypto_spawn base; }; static inline int crypto_grab_cipher(struct crypto_cipher_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask) { type &= ~CRYPTO_ALG_TYPE_MASK; type |= CRYPTO_ALG_TYPE_CIPHER; mask |= CRYPTO_ALG_TYPE_MASK; return crypto_grab_spawn(&spawn->base, inst, name, type, mask); } static inline void crypto_drop_cipher(struct crypto_cipher_spawn *spawn) { crypto_drop_spawn(&spawn->base); } static inline struct crypto_alg *crypto_spawn_cipher_alg( struct crypto_cipher_spawn *spawn) { return spawn->base.alg; } static inline struct crypto_cipher *crypto_spawn_cipher( struct crypto_cipher_spawn *spawn) { u32 type = CRYPTO_ALG_TYPE_CIPHER; u32 mask = CRYPTO_ALG_TYPE_MASK; return __crypto_cipher_cast(crypto_spawn_tfm(&spawn->base, type, mask)); } static inline struct cipher_alg *crypto_cipher_alg(struct crypto_cipher *tfm) { return &crypto_cipher_tfm(tfm)->__crt_alg->cra_cipher; } #endif
3 6 1 6 5 3 2 2 6 1 6 2 6 4 3 3 3 3 7 7 7 1 6 1 2 3 3 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 // SPDX-License-Identifier: GPL-2.0-or-later /* * SQ930x subdriver * * Copyright (C) 2010 Jean-François Moine <http://moinejf.free.fr> * Copyright (C) 2006 -2008 Gerard Klaver <gerard at gkall dot hobby dot nl> * Copyright (C) 2007 Sam Revitch <samr7@cs.washington.edu> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "sq930x" #include "gspca.h" MODULE_AUTHOR("Jean-Francois Moine <http://moinejf.free.fr>\n" "Gerard Klaver <gerard at gkall dot hobby dot nl\n" "Sam Revitch <samr7@cs.washington.edu>"); MODULE_DESCRIPTION("GSPCA/SQ930x USB Camera Driver"); MODULE_LICENSE("GPL"); /* Structure to hold all of our device specific stuff */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ struct { /* exposure/gain control cluster */ struct v4l2_ctrl *exposure; struct v4l2_ctrl *gain; }; u8 do_ctrl; u8 gpio[2]; u8 sensor; u8 type; #define Generic 0 #define Creative_live_motion 1 }; enum sensors { SENSOR_ICX098BQ, SENSOR_LZ24BP, SENSOR_MI0360, SENSOR_MT9V111, /* = MI360SOC */ SENSOR_OV7660, SENSOR_OV9630, }; static struct v4l2_pix_format vga_mode[] = { {320, 240, V4L2_PIX_FMT_SRGGB8, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, {640, 480, V4L2_PIX_FMT_SRGGB8, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, }; /* sq930x registers */ #define SQ930_CTRL_UCBUS_IO 0x0001 #define SQ930_CTRL_I2C_IO 0x0002 #define SQ930_CTRL_GPIO 0x0005 #define SQ930_CTRL_CAP_START 0x0010 #define SQ930_CTRL_CAP_STOP 0x0011 #define SQ930_CTRL_SET_EXPOSURE 0x001d #define SQ930_CTRL_RESET 0x001e #define SQ930_CTRL_GET_DEV_INFO 0x001f /* gpio 1 (8..15) */ #define SQ930_GPIO_DFL_I2C_SDA 0x0001 #define SQ930_GPIO_DFL_I2C_SCL 0x0002 #define SQ930_GPIO_RSTBAR 0x0004 #define SQ930_GPIO_EXTRA1 0x0040 #define SQ930_GPIO_EXTRA2 0x0080 /* gpio 3 (24..31) */ #define SQ930_GPIO_POWER 0x0200 #define SQ930_GPIO_DFL_LED 0x1000 struct ucbus_write_cmd { u16 bw_addr; u8 bw_data; }; struct i2c_write_cmd { u8 reg; u16 val; }; static const struct ucbus_write_cmd icx098bq_start_0[] = { {0x0354, 0x00}, {0x03fa, 0x00}, {0xf800, 0x02}, {0xf801, 0xce}, {0xf802, 0xc1}, {0xf804, 0x00}, {0xf808, 0x00}, {0xf809, 0x0e}, {0xf80a, 0x01}, {0xf80b, 0xee}, {0xf807, 0x60}, {0xf80c, 0x02}, {0xf80d, 0xf0}, {0xf80e, 0x03}, {0xf80f, 0x0a}, {0xf81c, 0x02}, {0xf81d, 0xf0}, {0xf81e, 0x03}, {0xf81f, 0x0a}, {0xf83a, 0x00}, {0xf83b, 0x10}, {0xf83c, 0x00}, {0xf83d, 0x4e}, {0xf810, 0x04}, {0xf811, 0x00}, {0xf812, 0x02}, {0xf813, 0x10}, {0xf803, 0x00}, {0xf814, 0x01}, {0xf815, 0x18}, {0xf816, 0x00}, {0xf817, 0x48}, {0xf818, 0x00}, {0xf819, 0x25}, {0xf81a, 0x00}, {0xf81b, 0x3c}, {0xf82f, 0x03}, {0xf820, 0xff}, {0xf821, 0x0d}, {0xf822, 0xff}, {0xf823, 0x07}, {0xf824, 0xff}, {0xf825, 0x03}, {0xf826, 0xff}, {0xf827, 0x06}, {0xf828, 0xff}, {0xf829, 0x03}, {0xf82a, 0xff}, {0xf82b, 0x0c}, {0xf82c, 0xfd}, {0xf82d, 0x01}, {0xf82e, 0x00}, {0xf830, 0x00}, {0xf831, 0x47}, {0xf832, 0x00}, {0xf833, 0x00}, {0xf850, 0x00}, {0xf851, 0x00}, {0xf852, 0x00}, {0xf853, 0x24}, {0xf854, 0x00}, {0xf855, 0x18}, {0xf856, 0x00}, {0xf857, 0x3c}, {0xf858, 0x00}, {0xf859, 0x0c}, {0xf85a, 0x00}, {0xf85b, 0x30}, {0xf85c, 0x00}, {0xf85d, 0x0c}, {0xf85e, 0x00}, {0xf85f, 0x30}, {0xf860, 0x00}, {0xf861, 0x48}, {0xf862, 0x01}, {0xf863, 0xdc}, {0xf864, 0xff}, {0xf865, 0x98}, {0xf866, 0xff}, {0xf867, 0xc0}, {0xf868, 0xff}, {0xf869, 0x70}, {0xf86c, 0xff}, {0xf86d, 0x00}, {0xf86a, 0xff}, {0xf86b, 0x48}, {0xf86e, 0xff}, {0xf86f, 0x00}, {0xf870, 0x01}, {0xf871, 0xdb}, {0xf872, 0x01}, {0xf873, 0xfa}, {0xf874, 0x01}, {0xf875, 0xdb}, {0xf876, 0x01}, {0xf877, 0xfa}, {0xf878, 0x0f}, {0xf879, 0x0f}, {0xf87a, 0xff}, {0xf87b, 0xff}, {0xf800, 0x03} }; static const struct ucbus_write_cmd icx098bq_start_1[] = { {0xf5f0, 0x00}, {0xf5f1, 0xcd}, {0xf5f2, 0x80}, {0xf5f3, 0x80}, {0xf5f4, 0xc0}, {0xf5f0, 0x49}, {0xf5f1, 0xcd}, {0xf5f2, 0x80}, {0xf5f3, 0x80}, {0xf5f4, 0xc0}, {0xf5fa, 0x00}, {0xf5f6, 0x00}, {0xf5f7, 0x00}, {0xf5f8, 0x00}, {0xf5f9, 0x00} }; static const struct ucbus_write_cmd icx098bq_start_2[] = { {0xf800, 0x02}, {0xf807, 0xff}, {0xf805, 0x82}, {0xf806, 0x00}, {0xf807, 0x7f}, {0xf800, 0x03}, {0xf800, 0x02}, {0xf807, 0xff}, {0xf805, 0x40}, {0xf806, 0x00}, {0xf807, 0x7f}, {0xf800, 0x03}, {0xf800, 0x02}, {0xf807, 0xff}, {0xf805, 0xcf}, {0xf806, 0xd0}, {0xf807, 0x7f}, {0xf800, 0x03}, {0xf800, 0x02}, {0xf807, 0xff}, {0xf805, 0x00}, {0xf806, 0x00}, {0xf807, 0x7f}, {0xf800, 0x03} }; static const struct ucbus_write_cmd lz24bp_start_0[] = { {0x0354, 0x00}, {0x03fa, 0x00}, {0xf800, 0x02}, {0xf801, 0xbe}, {0xf802, 0xc6}, {0xf804, 0x00}, {0xf808, 0x00}, {0xf809, 0x06}, {0xf80a, 0x01}, {0xf80b, 0xfe}, {0xf807, 0x84}, {0xf80c, 0x02}, {0xf80d, 0xf7}, {0xf80e, 0x03}, {0xf80f, 0x0b}, {0xf81c, 0x00}, {0xf81d, 0x49}, {0xf81e, 0x03}, {0xf81f, 0x0b}, {0xf83a, 0x00}, {0xf83b, 0x01}, {0xf83c, 0x00}, {0xf83d, 0x6b}, {0xf810, 0x03}, {0xf811, 0x10}, {0xf812, 0x02}, {0xf813, 0x6f}, {0xf803, 0x00}, {0xf814, 0x00}, {0xf815, 0x44}, {0xf816, 0x00}, {0xf817, 0x48}, {0xf818, 0x00}, {0xf819, 0x25}, {0xf81a, 0x00}, {0xf81b, 0x3c}, {0xf82f, 0x03}, {0xf820, 0xff}, {0xf821, 0x0d}, {0xf822, 0xff}, {0xf823, 0x07}, {0xf824, 0xfd}, {0xf825, 0x07}, {0xf826, 0xf0}, {0xf827, 0x0c}, {0xf828, 0xff}, {0xf829, 0x03}, {0xf82a, 0xff}, {0xf82b, 0x0c}, {0xf82c, 0xfc}, {0xf82d, 0x01}, {0xf82e, 0x00}, {0xf830, 0x00}, {0xf831, 0x47}, {0xf832, 0x00}, {0xf833, 0x00}, {0xf850, 0x00}, {0xf851, 0x00}, {0xf852, 0x00}, {0xf853, 0x24}, {0xf854, 0x00}, {0xf855, 0x0c}, {0xf856, 0x00}, {0xf857, 0x30}, {0xf858, 0x00}, {0xf859, 0x18}, {0xf85a, 0x00}, {0xf85b, 0x3c}, {0xf85c, 0x00}, {0xf85d, 0x18}, {0xf85e, 0x00}, {0xf85f, 0x3c}, {0xf860, 0xff}, {0xf861, 0x37}, {0xf862, 0xff}, {0xf863, 0x1d}, {0xf864, 0xff}, {0xf865, 0x98}, {0xf866, 0xff}, {0xf867, 0xc0}, {0xf868, 0x00}, {0xf869, 0x37}, {0xf86c, 0x02}, {0xf86d, 0x1d}, {0xf86a, 0x00}, {0xf86b, 0x37}, {0xf86e, 0x02}, {0xf86f, 0x1d}, {0xf870, 0x01}, {0xf871, 0xc6}, {0xf872, 0x02}, {0xf873, 0x04}, {0xf874, 0x01}, {0xf875, 0xc6}, {0xf876, 0x02}, {0xf877, 0x04}, {0xf878, 0x0f}, {0xf879, 0x0f}, {0xf87a, 0xff}, {0xf87b, 0xff}, {0xf800, 0x03} }; static const struct ucbus_write_cmd lz24bp_start_1_gen[] = { {0xf5f0, 0x00}, {0xf5f1, 0xff}, {0xf5f2, 0x80}, {0xf5f3, 0x80}, {0xf5f4, 0xb3}, {0xf5f0, 0x40}, {0xf5f1, 0xff}, {0xf5f2, 0x80}, {0xf5f3, 0x80}, {0xf5f4, 0xb3}, {0xf5fa, 0x00}, {0xf5f6, 0x00}, {0xf5f7, 0x00}, {0xf5f8, 0x00}, {0xf5f9, 0x00} }; static const struct ucbus_write_cmd lz24bp_start_1_clm[] = { {0xf5f0, 0x00}, {0xf5f1, 0xff}, {0xf5f2, 0x88}, {0xf5f3, 0x88}, {0xf5f4, 0xc0}, {0xf5f0, 0x40}, {0xf5f1, 0xff}, {0xf5f2, 0x88}, {0xf5f3, 0x88}, {0xf5f4, 0xc0}, {0xf5fa, 0x00}, {0xf5f6, 0x00}, {0xf5f7, 0x00}, {0xf5f8, 0x00}, {0xf5f9, 0x00} }; static const struct ucbus_write_cmd lz24bp_start_2[] = { {0xf800, 0x02}, {0xf807, 0xff}, {0xf805, 0x80}, {0xf806, 0x00}, {0xf807, 0x7f}, {0xf800, 0x03}, {0xf800, 0x02}, {0xf807, 0xff}, {0xf805, 0x4e}, {0xf806, 0x00}, {0xf807, 0x7f}, {0xf800, 0x03}, {0xf800, 0x02}, {0xf807, 0xff}, {0xf805, 0xc0}, {0xf806, 0x48}, {0xf807, 0x7f}, {0xf800, 0x03}, {0xf800, 0x02}, {0xf807, 0xff}, {0xf805, 0x00}, {0xf806, 0x00}, {0xf807, 0x7f}, {0xf800, 0x03} }; static const struct ucbus_write_cmd mi0360_start_0[] = { {0x0354, 0x00}, {0x03fa, 0x00}, {0xf332, 0xcc}, {0xf333, 0xcc}, {0xf334, 0xcc}, {0xf335, 0xcc}, {0xf33f, 0x00} }; static const struct i2c_write_cmd mi0360_init_23[] = { {0x30, 0x0040}, /* reserved - def 0x0005 */ {0x31, 0x0000}, /* reserved - def 0x002a */ {0x34, 0x0100}, /* reserved - def 0x0100 */ {0x3d, 0x068f}, /* reserved - def 0x068f */ }; static const struct i2c_write_cmd mi0360_init_24[] = { {0x03, 0x01e5}, /* window height */ {0x04, 0x0285}, /* window width */ }; static const struct i2c_write_cmd mi0360_init_25[] = { {0x35, 0x0020}, /* global gain */ {0x2b, 0x0020}, /* green1 gain */ {0x2c, 0x002a}, /* blue gain */ {0x2d, 0x0028}, /* red gain */ {0x2e, 0x0020}, /* green2 gain */ }; static const struct ucbus_write_cmd mi0360_start_1[] = { {0xf5f0, 0x11}, {0xf5f1, 0x99}, {0xf5f2, 0x80}, {0xf5f3, 0x80}, {0xf5f4, 0xa6}, {0xf5f0, 0x51}, {0xf5f1, 0x99}, {0xf5f2, 0x80}, {0xf5f3, 0x80}, {0xf5f4, 0xa6}, {0xf5fa, 0x00}, {0xf5f6, 0x00}, {0xf5f7, 0x00}, {0xf5f8, 0x00}, {0xf5f9, 0x00} }; static const struct i2c_write_cmd mi0360_start_2[] = { {0x62, 0x041d}, /* reserved - def 0x0418 */ }; static const struct i2c_write_cmd mi0360_start_3[] = { {0x05, 0x007b}, /* horiz blanking */ }; static const struct i2c_write_cmd mi0360_start_4[] = { {0x05, 0x03f5}, /* horiz blanking */ }; static const struct i2c_write_cmd mt9v111_init_0[] = { {0x01, 0x0001}, /* select IFP/SOC registers */ {0x06, 0x300c}, /* operating mode control */ {0x08, 0xcc00}, /* output format control (RGB) */ {0x01, 0x0004}, /* select sensor core registers */ }; static const struct i2c_write_cmd mt9v111_init_1[] = { {0x03, 0x01e5}, /* window height */ {0x04, 0x0285}, /* window width */ }; static const struct i2c_write_cmd mt9v111_init_2[] = { {0x30, 0x7800}, {0x31, 0x0000}, {0x07, 0x3002}, /* output control */ {0x35, 0x0020}, /* global gain */ {0x2b, 0x0020}, /* green1 gain */ {0x2c, 0x0020}, /* blue gain */ {0x2d, 0x0020}, /* red gain */ {0x2e, 0x0020}, /* green2 gain */ }; static const struct ucbus_write_cmd mt9v111_start_1[] = { {0xf5f0, 0x11}, {0xf5f1, 0x96}, {0xf5f2, 0x80}, {0xf5f3, 0x80}, {0xf5f4, 0xaa}, {0xf5f0, 0x51}, {0xf5f1, 0x96}, {0xf5f2, 0x80}, {0xf5f3, 0x80}, {0xf5f4, 0xaa}, {0xf5fa, 0x00}, {0xf5f6, 0x0a}, {0xf5f7, 0x0a}, {0xf5f8, 0x0a}, {0xf5f9, 0x0a} }; static const struct i2c_write_cmd mt9v111_init_3[] = { {0x62, 0x0405}, }; static const struct i2c_write_cmd mt9v111_init_4[] = { /* {0x05, 0x00ce}, */ {0x05, 0x005d}, /* horizontal blanking */ }; static const struct ucbus_write_cmd ov7660_start_0[] = { {0x0354, 0x00}, {0x03fa, 0x00}, {0xf332, 0x00}, {0xf333, 0xc0}, {0xf334, 0x39}, {0xf335, 0xe7}, {0xf33f, 0x03} }; static const struct ucbus_write_cmd ov9630_start_0[] = { {0x0354, 0x00}, {0x03fa, 0x00}, {0xf332, 0x00}, {0xf333, 0x00}, {0xf334, 0x3e}, {0xf335, 0xf8}, {0xf33f, 0x03} }; /* start parameters indexed by [sensor][mode] */ static const struct cap_s { u8 cc_sizeid; u8 cc_bytes[32]; } capconfig[4][2] = { [SENSOR_ICX098BQ] = { {2, /* Bayer 320x240 */ {0x05, 0x1f, 0x20, 0x0e, 0x00, 0x9f, 0x02, 0xee, 0x01, 0x01, 0x00, 0x08, 0x18, 0x12, 0x78, 0xc8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} }, {4, /* Bayer 640x480 */ {0x01, 0x1f, 0x20, 0x0e, 0x00, 0x9f, 0x02, 0xee, 0x01, 0x02, 0x00, 0x08, 0x18, 0x12, 0x78, 0xc8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} }, }, [SENSOR_LZ24BP] = { {2, /* Bayer 320x240 */ {0x05, 0x22, 0x20, 0x0e, 0x00, 0xa2, 0x02, 0xee, 0x01, 0x01, 0x00, 0x08, 0x18, 0x12, 0x78, 0xc8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} }, {4, /* Bayer 640x480 */ {0x01, 0x22, 0x20, 0x0e, 0x00, 0xa2, 0x02, 0xee, 0x01, 0x02, 0x00, 0x08, 0x18, 0x12, 0x78, 0xc8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} }, }, [SENSOR_MI0360] = { {2, /* Bayer 320x240 */ {0x05, 0x02, 0x20, 0x01, 0x20, 0x82, 0x02, 0xe1, 0x01, 0x01, 0x00, 0x08, 0x18, 0x12, 0x78, 0xc8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} }, {4, /* Bayer 640x480 */ {0x01, 0x02, 0x20, 0x01, 0x20, 0x82, 0x02, 0xe1, 0x01, 0x02, 0x00, 0x08, 0x18, 0x12, 0x78, 0xc8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} }, }, [SENSOR_MT9V111] = { {2, /* Bayer 320x240 */ {0x05, 0x02, 0x20, 0x01, 0x20, 0x82, 0x02, 0xe1, 0x01, 0x01, 0x00, 0x08, 0x18, 0x12, 0x78, 0xc8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} }, {4, /* Bayer 640x480 */ {0x01, 0x02, 0x20, 0x01, 0x20, 0x82, 0x02, 0xe1, 0x01, 0x02, 0x00, 0x08, 0x18, 0x12, 0x78, 0xc8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} }, }, }; struct sensor_s { const char *name; u8 i2c_addr; u8 i2c_dum; u8 gpio[5]; u8 cmd_len; const struct ucbus_write_cmd *cmd; }; static const struct sensor_s sensor_tb[] = { [SENSOR_ICX098BQ] = { "icx098bp", 0x00, 0x00, {0, SQ930_GPIO_DFL_I2C_SDA | SQ930_GPIO_DFL_I2C_SCL, SQ930_GPIO_DFL_I2C_SDA, 0, SQ930_GPIO_RSTBAR }, 8, icx098bq_start_0 }, [SENSOR_LZ24BP] = { "lz24bp", 0x00, 0x00, {0, SQ930_GPIO_DFL_I2C_SDA | SQ930_GPIO_DFL_I2C_SCL, SQ930_GPIO_DFL_I2C_SDA, 0, SQ930_GPIO_RSTBAR }, 8, lz24bp_start_0 }, [SENSOR_MI0360] = { "mi0360", 0x5d, 0x80, {SQ930_GPIO_RSTBAR, SQ930_GPIO_DFL_I2C_SDA | SQ930_GPIO_DFL_I2C_SCL, SQ930_GPIO_DFL_I2C_SDA, 0, 0 }, 7, mi0360_start_0 }, [SENSOR_MT9V111] = { "mt9v111", 0x5c, 0x7f, {SQ930_GPIO_RSTBAR, SQ930_GPIO_DFL_I2C_SDA | SQ930_GPIO_DFL_I2C_SCL, SQ930_GPIO_DFL_I2C_SDA, 0, 0 }, 7, mi0360_start_0 }, [SENSOR_OV7660] = { "ov7660", 0x21, 0x00, {0, SQ930_GPIO_DFL_I2C_SDA | SQ930_GPIO_DFL_I2C_SCL, SQ930_GPIO_DFL_I2C_SDA, 0, SQ930_GPIO_RSTBAR }, 7, ov7660_start_0 }, [SENSOR_OV9630] = { "ov9630", 0x30, 0x00, {0, SQ930_GPIO_DFL_I2C_SDA | SQ930_GPIO_DFL_I2C_SCL, SQ930_GPIO_DFL_I2C_SDA, 0, SQ930_GPIO_RSTBAR }, 7, ov9630_start_0 }, }; static void reg_r(struct gspca_dev *gspca_dev, u16 value, int len) { int ret; if (gspca_dev->usb_err < 0) return; ret = usb_control_msg(gspca_dev->dev, usb_rcvctrlpipe(gspca_dev->dev, 0), 0x0c, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, 0, gspca_dev->usb_buf, len, 500); if (ret < 0) { pr_err("reg_r %04x failed %d\n", value, ret); gspca_dev->usb_err = ret; /* * Make sure the buffer is zeroed to avoid uninitialized * values. */ memset(gspca_dev->usb_buf, 0, USB_BUF_SZ); } } static void reg_w(struct gspca_dev *gspca_dev, u16 value, u16 index) { int ret; if (gspca_dev->usb_err < 0) return; gspca_dbg(gspca_dev, D_USBO, "reg_w v: %04x i: %04x\n", value, index); ret = usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), 0x0c, /* request */ USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, NULL, 0, 500); msleep(30); if (ret < 0) { pr_err("reg_w %04x %04x failed %d\n", value, index, ret); gspca_dev->usb_err = ret; } } static void reg_wb(struct gspca_dev *gspca_dev, u16 value, u16 index, const u8 *data, int len) { int ret; if (gspca_dev->usb_err < 0) return; gspca_dbg(gspca_dev, D_USBO, "reg_wb v: %04x i: %04x %02x...%02x\n", value, index, *data, data[len - 1]); memcpy(gspca_dev->usb_buf, data, len); ret = usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), 0x0c, /* request */ USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, gspca_dev->usb_buf, len, 1000); msleep(30); if (ret < 0) { pr_err("reg_wb %04x %04x failed %d\n", value, index, ret); gspca_dev->usb_err = ret; } } static void i2c_write(struct sd *sd, const struct i2c_write_cmd *cmd, int ncmds) { struct gspca_dev *gspca_dev = &sd->gspca_dev; const struct sensor_s *sensor; u16 val, idx; u8 *buf; int ret; if (gspca_dev->usb_err < 0) return; sensor = &sensor_tb[sd->sensor]; val = (sensor->i2c_addr << 8) | SQ930_CTRL_I2C_IO; idx = (cmd->val & 0xff00) | cmd->reg; buf = gspca_dev->usb_buf; *buf++ = sensor->i2c_dum; *buf++ = cmd->val; while (--ncmds > 0) { cmd++; *buf++ = cmd->reg; *buf++ = cmd->val >> 8; *buf++ = sensor->i2c_dum; *buf++ = cmd->val; } gspca_dbg(gspca_dev, D_USBO, "i2c_w v: %04x i: %04x %02x...%02x\n", val, idx, gspca_dev->usb_buf[0], buf[-1]); ret = usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), 0x0c, /* request */ USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, val, idx, gspca_dev->usb_buf, buf - gspca_dev->usb_buf, 500); if (ret < 0) { pr_err("i2c_write failed %d\n", ret); gspca_dev->usb_err = ret; } } static void ucbus_write(struct gspca_dev *gspca_dev, const struct ucbus_write_cmd *cmd, int ncmds, int batchsize) { u8 *buf; u16 val, idx; int len, ret; if (gspca_dev->usb_err < 0) return; if ((batchsize - 1) * 3 > USB_BUF_SZ) { gspca_err(gspca_dev, "Bug: usb_buf overflow\n"); gspca_dev->usb_err = -ENOMEM; return; } for (;;) { len = ncmds; if (len > batchsize) len = batchsize; ncmds -= len; val = (cmd->bw_addr << 8) | SQ930_CTRL_UCBUS_IO; idx = (cmd->bw_data << 8) | (cmd->bw_addr >> 8); buf = gspca_dev->usb_buf; while (--len > 0) { cmd++; *buf++ = cmd->bw_addr; *buf++ = cmd->bw_addr >> 8; *buf++ = cmd->bw_data; } if (buf != gspca_dev->usb_buf) gspca_dbg(gspca_dev, D_USBO, "ucbus v: %04x i: %04x %02x...%02x\n", val, idx, gspca_dev->usb_buf[0], buf[-1]); else gspca_dbg(gspca_dev, D_USBO, "ucbus v: %04x i: %04x\n", val, idx); ret = usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), 0x0c, /* request */ USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, val, idx, gspca_dev->usb_buf, buf - gspca_dev->usb_buf, 500); if (ret < 0) { pr_err("ucbus_write failed %d\n", ret); gspca_dev->usb_err = ret; return; } msleep(30); if (ncmds <= 0) break; cmd++; } } static void gpio_set(struct sd *sd, u16 val, u16 mask) { struct gspca_dev *gspca_dev = &sd->gspca_dev; if (mask & 0x00ff) { sd->gpio[0] &= ~mask; sd->gpio[0] |= val; reg_w(gspca_dev, 0x0100 | SQ930_CTRL_GPIO, ~sd->gpio[0] << 8); } mask >>= 8; val >>= 8; if (mask) { sd->gpio[1] &= ~mask; sd->gpio[1] |= val; reg_w(gspca_dev, 0x0300 | SQ930_CTRL_GPIO, ~sd->gpio[1] << 8); } } static void gpio_init(struct sd *sd, const u8 *gpio) { gpio_set(sd, *gpio++, 0x000f); gpio_set(sd, *gpio++, 0x000f); gpio_set(sd, *gpio++, 0x000f); gpio_set(sd, *gpio++, 0x000f); gpio_set(sd, *gpio, 0x000f); } static void bridge_init(struct sd *sd) { static const struct ucbus_write_cmd clkfreq_cmd = { 0xf031, 0 /* SQ930_CLKFREQ_60MHZ */ }; ucbus_write(&sd->gspca_dev, &clkfreq_cmd, 1, 1); gpio_set(sd, SQ930_GPIO_POWER, 0xff00); } static void cmos_probe(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int i; const struct sensor_s *sensor; static const u8 probe_order[] = { /* SENSOR_LZ24BP, (tested as ccd) */ SENSOR_OV9630, SENSOR_MI0360, SENSOR_OV7660, SENSOR_MT9V111, }; for (i = 0; i < ARRAY_SIZE(probe_order); i++) { sensor = &sensor_tb[probe_order[i]]; ucbus_write(&sd->gspca_dev, sensor->cmd, sensor->cmd_len, 8); gpio_init(sd, sensor->gpio); msleep(100); reg_r(gspca_dev, (sensor->i2c_addr << 8) | 0x001c, 1); msleep(100); if (gspca_dev->usb_buf[0] != 0) break; } if (i >= ARRAY_SIZE(probe_order)) { pr_err("Unknown sensor\n"); gspca_dev->usb_err = -EINVAL; return; } sd->sensor = probe_order[i]; switch (sd->sensor) { case SENSOR_OV7660: case SENSOR_OV9630: pr_err("Sensor %s not yet treated\n", sensor_tb[sd->sensor].name); gspca_dev->usb_err = -EINVAL; break; } } static void mt9v111_init(struct gspca_dev *gspca_dev) { int i, nwait; static const u8 cmd_001b[] = { 0x00, 0x3b, 0xf6, 0x01, 0x03, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00 }; static const u8 cmd_011b[][7] = { {0x10, 0x01, 0x66, 0x08, 0x00, 0x00, 0x00}, {0x01, 0x00, 0x1a, 0x04, 0x00, 0x00, 0x00}, {0x20, 0x00, 0x10, 0x04, 0x00, 0x00, 0x00}, {0x02, 0x01, 0xae, 0x01, 0x00, 0x00, 0x00}, }; reg_wb(gspca_dev, 0x001b, 0x0000, cmd_001b, sizeof cmd_001b); for (i = 0; i < ARRAY_SIZE(cmd_011b); i++) { reg_wb(gspca_dev, 0x001b, 0x0000, cmd_011b[i], ARRAY_SIZE(cmd_011b[0])); msleep(400); nwait = 20; for (;;) { reg_r(gspca_dev, 0x031b, 1); if (gspca_dev->usb_buf[0] == 0 || gspca_dev->usb_err != 0) break; if (--nwait < 0) { gspca_dbg(gspca_dev, D_PROBE, "mt9v111_init timeout\n"); gspca_dev->usb_err = -ETIME; return; } msleep(50); } } } static void global_init(struct sd *sd, int first_time) { switch (sd->sensor) { case SENSOR_ICX098BQ: if (first_time) ucbus_write(&sd->gspca_dev, icx098bq_start_0, 8, 8); gpio_init(sd, sensor_tb[sd->sensor].gpio); break; case SENSOR_LZ24BP: if (sd->type != Creative_live_motion) gpio_set(sd, SQ930_GPIO_EXTRA1, 0x00ff); else gpio_set(sd, 0, 0x00ff); msleep(50); if (first_time) ucbus_write(&sd->gspca_dev, lz24bp_start_0, 8, 8); gpio_init(sd, sensor_tb[sd->sensor].gpio); break; case SENSOR_MI0360: if (first_time) ucbus_write(&sd->gspca_dev, mi0360_start_0, ARRAY_SIZE(mi0360_start_0), 8); gpio_init(sd, sensor_tb[sd->sensor].gpio); gpio_set(sd, SQ930_GPIO_EXTRA2, SQ930_GPIO_EXTRA2); break; default: /* case SENSOR_MT9V111: */ if (first_time) mt9v111_init(&sd->gspca_dev); else gpio_init(sd, sensor_tb[sd->sensor].gpio); break; } } static void lz24bp_ppl(struct sd *sd, u16 ppl) { struct ucbus_write_cmd cmds[2] = { {0xf810, ppl >> 8}, {0xf811, ppl} }; ucbus_write(&sd->gspca_dev, cmds, ARRAY_SIZE(cmds), 2); } static void setexposure(struct gspca_dev *gspca_dev, s32 expo, s32 gain) { struct sd *sd = (struct sd *) gspca_dev; int i, integclks, intstartclk, frameclks, min_frclk; const struct sensor_s *sensor; u16 cmd; u8 buf[15]; integclks = expo; i = 0; cmd = SQ930_CTRL_SET_EXPOSURE; switch (sd->sensor) { case SENSOR_ICX098BQ: /* ccd */ case SENSOR_LZ24BP: min_frclk = sd->sensor == SENSOR_ICX098BQ ? 0x210 : 0x26f; if (integclks >= min_frclk) { intstartclk = 0; frameclks = integclks; } else { intstartclk = min_frclk - integclks; frameclks = min_frclk; } buf[i++] = intstartclk >> 8; buf[i++] = intstartclk; buf[i++] = frameclks >> 8; buf[i++] = frameclks; buf[i++] = gain; break; default: /* cmos */ /* case SENSOR_MI0360: */ /* case SENSOR_MT9V111: */ cmd |= 0x0100; sensor = &sensor_tb[sd->sensor]; buf[i++] = sensor->i2c_addr; /* i2c_slave_addr */ buf[i++] = 0x08; /* 2 * ni2c */ buf[i++] = 0x09; /* reg = shutter width */ buf[i++] = integclks >> 8; /* val H */ buf[i++] = sensor->i2c_dum; buf[i++] = integclks; /* val L */ buf[i++] = 0x35; /* reg = global gain */ buf[i++] = 0x00; /* val H */ buf[i++] = sensor->i2c_dum; buf[i++] = 0x80 + gain / 2; /* val L */ buf[i++] = 0x00; buf[i++] = 0x00; buf[i++] = 0x00; buf[i++] = 0x00; buf[i++] = 0x83; break; } reg_wb(gspca_dev, cmd, 0, buf, i); } /* This function is called at probe time just before sd_init */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam = &gspca_dev->cam; sd->sensor = id->driver_info >> 8; sd->type = id->driver_info; cam->cam_mode = vga_mode; cam->nmodes = ARRAY_SIZE(vga_mode); cam->bulk = 1; return 0; } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; sd->gpio[0] = sd->gpio[1] = 0xff; /* force gpio rewrite */ /*fixme: is this needed for icx098bp and mi0360? if (sd->sensor != SENSOR_LZ24BP) reg_w(gspca_dev, SQ930_CTRL_RESET, 0x0000); */ reg_r(gspca_dev, SQ930_CTRL_GET_DEV_INFO, 8); if (gspca_dev->usb_err < 0) return gspca_dev->usb_err; /* it returns: * 03 00 12 93 0b f6 c9 00 live! ultra * 03 00 07 93 0b f6 ca 00 live! ultra for notebook * 03 00 12 93 0b fe c8 00 Trust WB-3500T * 02 00 06 93 0b fe c8 00 Joy-IT 318S * 03 00 12 93 0b f6 cf 00 icam tracer - sensor icx098bq * 02 00 12 93 0b fe cf 00 ProQ Motion Webcam * * byte * 0: 02 = usb 1.0 (12Mbit) / 03 = usb2.0 (480Mbit) * 1: 00 * 2: 06 / 07 / 12 = mode webcam? firmware?? * 3: 93 chip = 930b (930b or 930c) * 4: 0b * 5: f6 = cdd (icx098bq, lz24bp) / fe or de = cmos (i2c) (other sensors) * 6: c8 / c9 / ca / cf = mode webcam?, sensor? webcam? * 7: 00 */ gspca_dbg(gspca_dev, D_PROBE, "info: %*ph\n", 8, gspca_dev->usb_buf); bridge_init(sd); if (sd->sensor == SENSOR_MI0360) { /* no sensor probe for icam tracer */ if (gspca_dev->usb_buf[5] == 0xf6) /* if ccd */ sd->sensor = SENSOR_ICX098BQ; else cmos_probe(gspca_dev); } if (gspca_dev->usb_err >= 0) { gspca_dbg(gspca_dev, D_PROBE, "Sensor %s\n", sensor_tb[sd->sensor].name); global_init(sd, 1); } return gspca_dev->usb_err; } /* send the start/stop commands to the webcam */ static void send_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; const struct cap_s *cap; int mode; mode = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv; cap = &capconfig[sd->sensor][mode]; reg_wb(gspca_dev, 0x0900 | SQ930_CTRL_CAP_START, 0x0a00 | cap->cc_sizeid, cap->cc_bytes, 32); } static void send_stop(struct gspca_dev *gspca_dev) { reg_w(gspca_dev, SQ930_CTRL_CAP_STOP, 0); } /* function called at start time before URB creation */ static int sd_isoc_init(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; gspca_dev->cam.bulk_nurbs = 1; /* there must be one URB only */ sd->do_ctrl = 0; gspca_dev->cam.bulk_size = gspca_dev->pixfmt.width * gspca_dev->pixfmt.height + 8; return 0; } /* start the capture */ static int sd_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int mode; bridge_init(sd); global_init(sd, 0); msleep(100); switch (sd->sensor) { case SENSOR_ICX098BQ: ucbus_write(gspca_dev, icx098bq_start_0, ARRAY_SIZE(icx098bq_start_0), 8); ucbus_write(gspca_dev, icx098bq_start_1, ARRAY_SIZE(icx098bq_start_1), 5); ucbus_write(gspca_dev, icx098bq_start_2, ARRAY_SIZE(icx098bq_start_2), 6); msleep(50); /* 1st start */ send_start(gspca_dev); gpio_set(sd, SQ930_GPIO_EXTRA2 | SQ930_GPIO_RSTBAR, 0x00ff); msleep(70); reg_w(gspca_dev, SQ930_CTRL_CAP_STOP, 0x0000); gpio_set(sd, 0x7f, 0x00ff); /* 2nd start */ send_start(gspca_dev); gpio_set(sd, SQ930_GPIO_EXTRA2 | SQ930_GPIO_RSTBAR, 0x00ff); goto out; case SENSOR_LZ24BP: ucbus_write(gspca_dev, lz24bp_start_0, ARRAY_SIZE(lz24bp_start_0), 8); if (sd->type != Creative_live_motion) ucbus_write(gspca_dev, lz24bp_start_1_gen, ARRAY_SIZE(lz24bp_start_1_gen), 5); else ucbus_write(gspca_dev, lz24bp_start_1_clm, ARRAY_SIZE(lz24bp_start_1_clm), 5); ucbus_write(gspca_dev, lz24bp_start_2, ARRAY_SIZE(lz24bp_start_2), 6); mode = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv; lz24bp_ppl(sd, mode == 1 ? 0x0564 : 0x0310); msleep(10); break; case SENSOR_MI0360: ucbus_write(gspca_dev, mi0360_start_0, ARRAY_SIZE(mi0360_start_0), 8); i2c_write(sd, mi0360_init_23, ARRAY_SIZE(mi0360_init_23)); i2c_write(sd, mi0360_init_24, ARRAY_SIZE(mi0360_init_24)); i2c_write(sd, mi0360_init_25, ARRAY_SIZE(mi0360_init_25)); ucbus_write(gspca_dev, mi0360_start_1, ARRAY_SIZE(mi0360_start_1), 5); i2c_write(sd, mi0360_start_2, ARRAY_SIZE(mi0360_start_2)); i2c_write(sd, mi0360_start_3, ARRAY_SIZE(mi0360_start_3)); /* 1st start */ send_start(gspca_dev); msleep(60); send_stop(gspca_dev); i2c_write(sd, mi0360_start_4, ARRAY_SIZE(mi0360_start_4)); break; default: /* case SENSOR_MT9V111: */ ucbus_write(gspca_dev, mi0360_start_0, ARRAY_SIZE(mi0360_start_0), 8); i2c_write(sd, mt9v111_init_0, ARRAY_SIZE(mt9v111_init_0)); i2c_write(sd, mt9v111_init_1, ARRAY_SIZE(mt9v111_init_1)); i2c_write(sd, mt9v111_init_2, ARRAY_SIZE(mt9v111_init_2)); ucbus_write(gspca_dev, mt9v111_start_1, ARRAY_SIZE(mt9v111_start_1), 5); i2c_write(sd, mt9v111_init_3, ARRAY_SIZE(mt9v111_init_3)); i2c_write(sd, mt9v111_init_4, ARRAY_SIZE(mt9v111_init_4)); break; } send_start(gspca_dev); out: msleep(1000); if (sd->sensor == SENSOR_MT9V111) gpio_set(sd, SQ930_GPIO_DFL_LED, SQ930_GPIO_DFL_LED); sd->do_ctrl = 1; /* set the exposure */ return gspca_dev->usb_err; } static void sd_stopN(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; if (sd->sensor == SENSOR_MT9V111) gpio_set(sd, 0, SQ930_GPIO_DFL_LED); send_stop(gspca_dev); } /* function called when the application gets a new frame */ /* It sets the exposure if required and restart the bulk transfer. */ static void sd_dq_callback(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int ret; if (!sd->do_ctrl || gspca_dev->cam.bulk_nurbs != 0) return; sd->do_ctrl = 0; setexposure(gspca_dev, v4l2_ctrl_g_ctrl(sd->exposure), v4l2_ctrl_g_ctrl(sd->gain)); gspca_dev->cam.bulk_nurbs = 1; ret = usb_submit_urb(gspca_dev->urb[0], GFP_KERNEL); if (ret < 0) pr_err("sd_dq_callback() err %d\n", ret); /* wait a little time, otherwise the webcam crashes */ msleep(100); } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { struct sd *sd = (struct sd *) gspca_dev; if (sd->do_ctrl) gspca_dev->cam.bulk_nurbs = 0; gspca_frame_add(gspca_dev, FIRST_PACKET, NULL, 0); gspca_frame_add(gspca_dev, INTER_PACKET, data, len - 8); gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); struct sd *sd = (struct sd *) gspca_dev; gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_EXPOSURE: setexposure(gspca_dev, ctrl->val, sd->gain->val); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; struct sd *sd = (struct sd *) gspca_dev; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 2); sd->exposure = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_EXPOSURE, 1, 0xfff, 1, 0x356); sd->gain = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_GAIN, 1, 255, 1, 0x8d); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } v4l2_ctrl_cluster(2, &sd->exposure); return 0; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .isoc_init = sd_isoc_init, .start = sd_start, .stopN = sd_stopN, .pkt_scan = sd_pkt_scan, .dq_callback = sd_dq_callback, }; /* Table of supported USB devices */ #define ST(sensor, type) \ .driver_info = (SENSOR_ ## sensor << 8) \ | (type) static const struct usb_device_id device_table[] = { {USB_DEVICE(0x041e, 0x4038), ST(MI0360, 0)}, {USB_DEVICE(0x041e, 0x403c), ST(LZ24BP, 0)}, {USB_DEVICE(0x041e, 0x403d), ST(LZ24BP, 0)}, {USB_DEVICE(0x041e, 0x4041), ST(LZ24BP, Creative_live_motion)}, {USB_DEVICE(0x2770, 0x930b), ST(MI0360, 0)}, {USB_DEVICE(0x2770, 0x930c), ST(MI0360, 0)}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
56 57 70 2 70 2 1 36 33 1 15 12 2 39 9 60 48 13 4 5 52 37 35 20 52 36 28 53 14 4 53 2 18 1 10 4 3 69 70 70 70 45 10 1 4 1 29 1 26 11 52 52 52 52 70 63 33 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 // SPDX-License-Identifier: GPL-2.0 /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * The IP fragmentation functionality. * * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox <alan@lxorguk.ukuu.org.uk> * * Fixes: * Alan Cox : Split from ip.c , see ip_input.c for history. * David S. Miller : Begin massive cleanup... * Andi Kleen : Add sysctls. * xxxx : Overlapfrag bug. * Ultima : ip_expire() kernel panic. * Bill Hawes : Frag accounting and evictor fixes. * John McDonald : 0 length frag bug. * Alexey Kuznetsov: SMP races, threading, cleanup. * Patrick McHardy : LRU queue of frag heads for evictor. */ #define pr_fmt(fmt) "IPv4: " fmt #include <linux/compiler.h> #include <linux/module.h> #include <linux/types.h> #include <linux/mm.h> #include <linux/jiffies.h> #include <linux/skbuff.h> #include <linux/list.h> #include <linux/ip.h> #include <linux/icmp.h> #include <linux/netdevice.h> #include <linux/jhash.h> #include <linux/random.h> #include <linux/slab.h> #include <net/route.h> #include <net/dst.h> #include <net/sock.h> #include <net/ip.h> #include <net/icmp.h> #include <net/checksum.h> #include <net/inetpeer.h> #include <net/inet_frag.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/inet.h> #include <linux/netfilter_ipv4.h> #include <net/inet_ecn.h> #include <net/l3mdev.h> /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6 * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c * as well. Or notify me, at least. --ANK */ static const char ip_frag_cache_name[] = "ip4-frags"; /* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { struct inet_frag_queue q; u8 ecn; /* RFC3168 support */ u16 max_df_size; /* largest frag with DF set seen */ int iif; unsigned int rid; struct inet_peer *peer; }; static u8 ip4_frag_ecn(u8 tos) { return 1 << (tos & INET_ECN_MASK); } static struct inet_frags ip4_frags; static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, struct sk_buff *prev_tail, struct net_device *dev, int *refs); static void ip4_frag_init(struct inet_frag_queue *q, const void *a) { struct ipq *qp = container_of(q, struct ipq, q); const struct frag_v4_compare_key *key = a; struct net *net = q->fqdir->net; struct inet_peer *p = NULL; q->key.v4 = *key; qp->ecn = 0; if (q->fqdir->max_dist) { rcu_read_lock(); p = inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif); if (p && !refcount_inc_not_zero(&p->refcnt)) p = NULL; rcu_read_unlock(); } qp->peer = p; } static void ip4_frag_free(struct inet_frag_queue *q) { struct ipq *qp; qp = container_of(q, struct ipq, q); if (qp->peer) inet_putpeer(qp->peer); } static bool frag_expire_skip_icmp(u32 user) { return user == IP_DEFRAG_AF_PACKET || ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_IN, __IP_DEFRAG_CONNTRACK_IN_END) || ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_BRIDGE_IN, __IP_DEFRAG_CONNTRACK_BRIDGE_IN); } /* * Oops, a fragment queue timed out. Kill it and send an ICMP reply. */ static void ip_expire(struct timer_list *t) { enum skb_drop_reason reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT; struct inet_frag_queue *frag = timer_container_of(frag, t, timer); const struct iphdr *iph; struct sk_buff *head = NULL; struct net *net; struct ipq *qp; int refs = 1; qp = container_of(frag, struct ipq, q); net = qp->q.fqdir->net; rcu_read_lock(); /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */ if (READ_ONCE(qp->q.fqdir->dead)) goto out_rcu_unlock; spin_lock(&qp->q.lock); if (qp->q.flags & INET_FRAG_COMPLETE) goto out; qp->q.flags |= INET_FRAG_DROP; inet_frag_kill(&qp->q, &refs); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); if (!(qp->q.flags & INET_FRAG_FIRST_IN)) goto out; /* sk_buff::dev and sk_buff::rbnode are unionized. So we * pull the head out of the tree in order to be able to * deal with head->dev. */ head = inet_frag_pull_head(&qp->q); if (!head) goto out; head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) goto out; /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); reason = ip_route_input_noref(head, iph->daddr, iph->saddr, ip4h_dscp(iph), head->dev); if (reason) goto out; /* Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT; if (frag_expire_skip_icmp(qp->q.key.v4.user) && (skb_rtable(head)->rt_type != RTN_LOCAL)) goto out; spin_unlock(&qp->q.lock); icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); goto out_rcu_unlock; out: spin_unlock(&qp->q.lock); out_rcu_unlock: rcu_read_unlock(); kfree_skb_reason(head, reason); inet_frag_putn(&qp->q, refs); } /* Find the correct entry in the "incomplete datagrams" queue for * this IP datagram, and create new one, if nothing is found. */ static struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user, int vif) { struct frag_v4_compare_key key = { .saddr = iph->saddr, .daddr = iph->daddr, .user = user, .vif = vif, .id = iph->id, .protocol = iph->protocol, }; struct inet_frag_queue *q; q = inet_frag_find(net->ipv4.fqdir, &key); if (!q) return NULL; return container_of(q, struct ipq, q); } /* Is the fragment too far ahead to be part of ipq? */ static int ip_frag_too_far(struct ipq *qp) { struct inet_peer *peer = qp->peer; unsigned int max = qp->q.fqdir->max_dist; unsigned int start, end; int rc; if (!peer || !max) return 0; start = qp->rid; end = atomic_inc_return(&peer->rid); qp->rid = end; rc = qp->q.fragments_tail && (end - start) > max; if (rc) __IP_INC_STATS(qp->q.fqdir->net, IPSTATS_MIB_REASMFAILS); return rc; } static int ip_frag_reinit(struct ipq *qp) { unsigned int sum_truesize = 0; if (!mod_timer(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) { refcount_inc(&qp->q.refcnt); return -ETIMEDOUT; } sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments, SKB_DROP_REASON_FRAG_TOO_FAR); sub_frag_mem_limit(qp->q.fqdir, sum_truesize); qp->q.flags = 0; qp->q.len = 0; qp->q.meat = 0; qp->q.rb_fragments = RB_ROOT; qp->q.fragments_tail = NULL; qp->q.last_run_head = NULL; qp->iif = 0; qp->ecn = 0; return 0; } /* Add new segment to existing queue. */ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb, int *refs) { struct net *net = qp->q.fqdir->net; int ihl, end, flags, offset; struct sk_buff *prev_tail; struct net_device *dev; unsigned int fragsize; int err = -ENOENT; SKB_DR(reason); u8 ecn; /* If reassembly is already done, @skb must be a duplicate frag. */ if (qp->q.flags & INET_FRAG_COMPLETE) { SKB_DR_SET(reason, DUP_FRAG); goto err; } if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && unlikely(ip_frag_too_far(qp)) && unlikely(err = ip_frag_reinit(qp))) { inet_frag_kill(&qp->q, refs); goto err; } ecn = ip4_frag_ecn(ip_hdr(skb)->tos); offset = ntohs(ip_hdr(skb)->frag_off); flags = offset & ~IP_OFFSET; offset &= IP_OFFSET; offset <<= 3; /* offset is in 8-byte chunks */ ihl = ip_hdrlen(skb); /* Determine the position of this fragment. */ end = offset + skb->len - skb_network_offset(skb) - ihl; err = -EINVAL; /* Is this the final fragment? */ if ((flags & IP_MF) == 0) { /* If we already have some bits beyond end * or have different end, the segment is corrupted. */ if (end < qp->q.len || ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len)) goto discard_qp; qp->q.flags |= INET_FRAG_LAST_IN; qp->q.len = end; } else { if (end&7) { end &= ~7; if (skb->ip_summed != CHECKSUM_UNNECESSARY) skb->ip_summed = CHECKSUM_NONE; } if (end > qp->q.len) { /* Some bits beyond end -> corruption. */ if (qp->q.flags & INET_FRAG_LAST_IN) goto discard_qp; qp->q.len = end; } } if (end == offset) goto discard_qp; err = -ENOMEM; if (!pskb_pull(skb, skb_network_offset(skb) + ihl)) goto discard_qp; err = pskb_trim_rcsum(skb, end - offset); if (err) goto discard_qp; /* Note : skb->rbnode and skb->dev share the same location. */ dev = skb->dev; /* Makes sure compiler wont do silly aliasing games */ barrier(); prev_tail = qp->q.fragments_tail; err = inet_frag_queue_insert(&qp->q, skb, offset, end); if (err) goto insert_error; if (dev) qp->iif = dev->ifindex; qp->q.stamp = skb->tstamp; qp->q.tstamp_type = skb->tstamp_type; qp->q.meat += skb->len; qp->ecn |= ecn; add_frag_mem_limit(qp->q.fqdir, skb->truesize); if (offset == 0) qp->q.flags |= INET_FRAG_FIRST_IN; fragsize = skb->len + ihl; if (fragsize > qp->q.max_size) qp->q.max_size = fragsize; if (ip_hdr(skb)->frag_off & htons(IP_DF) && fragsize > qp->max_df_size) qp->max_df_size = fragsize; if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && qp->q.meat == qp->q.len) { unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; err = ip_frag_reasm(qp, skb, prev_tail, dev, refs); skb->_skb_refdst = orefdst; if (err) inet_frag_kill(&qp->q, refs); return err; } skb_dst_drop(skb); skb_orphan(skb); return -EINPROGRESS; insert_error: if (err == IPFRAG_DUP) { SKB_DR_SET(reason, DUP_FRAG); err = -EINVAL; goto err; } err = -EINVAL; __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS); discard_qp: inet_frag_kill(&qp->q, refs); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); err: kfree_skb_reason(skb, reason); return err; } static bool ip_frag_coalesce_ok(const struct ipq *qp) { return qp->q.key.v4.user == IP_DEFRAG_LOCAL_DELIVER; } /* Build a new IP datagram from all its fragments. */ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, struct sk_buff *prev_tail, struct net_device *dev, int *refs) { struct net *net = qp->q.fqdir->net; struct iphdr *iph; void *reasm_data; int len, err; u8 ecn; inet_frag_kill(&qp->q, refs); ecn = ip_frag_ecn_table[qp->ecn]; if (unlikely(ecn == 0xff)) { err = -EINVAL; goto out_fail; } /* Make the one we just received the head. */ reasm_data = inet_frag_reasm_prepare(&qp->q, skb, prev_tail); if (!reasm_data) goto out_nomem; len = ip_hdrlen(skb) + qp->q.len; err = -E2BIG; if (len > 65535) goto out_oversize; inet_frag_reasm_finish(&qp->q, skb, reasm_data, ip_frag_coalesce_ok(qp)); skb->dev = dev; IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size); iph = ip_hdr(skb); iph->tot_len = htons(len); iph->tos |= ecn; /* When we set IP_DF on a refragmented skb we must also force a * call to ip_fragment to avoid forwarding a DF-skb of size s while * original sender only sent fragments of size f (where f < s). * * We only set DF/IPSKB_FRAG_PMTU if such DF fragment was the largest * frag seen to avoid sending tiny DF-fragments in case skb was built * from one very small df-fragment and one large non-df frag. */ if (qp->max_df_size == qp->q.max_size) { IPCB(skb)->flags |= IPSKB_FRAG_PMTU; iph->frag_off = htons(IP_DF); } else { iph->frag_off = 0; } ip_send_check(iph); __IP_INC_STATS(net, IPSTATS_MIB_REASMOKS); qp->q.rb_fragments = RB_ROOT; qp->q.fragments_tail = NULL; qp->q.last_run_head = NULL; return 0; out_nomem: net_dbg_ratelimited("queue_glue: no memory for gluing queue %p\n", qp); err = -ENOMEM; goto out_fail; out_oversize: net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->q.key.v4.saddr); out_fail: __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); return err; } /* Process an incoming IP datagram fragment. */ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) { struct net_device *dev; struct ipq *qp; int vif; __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); /* Lookup (or create) queue header */ rcu_read_lock(); dev = skb->dev ? : skb_dst_dev_rcu(skb); vif = l3mdev_master_ifindex_rcu(dev); qp = ip_find(net, ip_hdr(skb), user, vif); if (qp) { int ret, refs = 0; spin_lock(&qp->q.lock); ret = ip_frag_queue(qp, skb, &refs); spin_unlock(&qp->q.lock); rcu_read_unlock(); inet_frag_putn(&qp->q, refs); return ret; } rcu_read_unlock(); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -ENOMEM; } EXPORT_SYMBOL(ip_defrag); struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) { struct iphdr iph; int netoff; u32 len; if (skb->protocol != htons(ETH_P_IP)) return skb; netoff = skb_network_offset(skb); if (skb_copy_bits(skb, netoff, &iph, sizeof(iph)) < 0) return skb; if (iph.ihl < 5 || iph.version != 4) return skb; len = ntohs(iph.tot_len); if (skb->len < netoff + len || len < (iph.ihl * 4)) return skb; if (ip_is_fragment(&iph)) { skb = skb_share_check(skb, GFP_ATOMIC); if (skb) { if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) { kfree_skb(skb); return NULL; } if (pskb_trim_rcsum(skb, netoff + len)) { kfree_skb(skb); return NULL; } memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); if (ip_defrag(net, skb, user)) return NULL; skb_clear_hash(skb); } } return skb; } EXPORT_SYMBOL(ip_check_defrag); #ifdef CONFIG_SYSCTL static int dist_min; static struct ctl_table ip4_frags_ns_ctl_table[] = { { .procname = "ipfrag_high_thresh", .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { .procname = "ipfrag_low_thresh", .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { .procname = "ipfrag_time", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ipfrag_max_dist", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &dist_min, }, }; /* secret interval has been deprecated */ static int ip4_frags_secret_interval_unused; static struct ctl_table ip4_frags_ctl_table[] = { { .procname = "ipfrag_secret_interval", .data = &ip4_frags_secret_interval_unused, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, }; static int __net_init ip4_frags_ns_ctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; table = ip4_frags_ns_ctl_table; if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL); if (!table) goto err_alloc; } table[0].data = &net->ipv4.fqdir->high_thresh; table[0].extra1 = &net->ipv4.fqdir->low_thresh; table[1].data = &net->ipv4.fqdir->low_thresh; table[1].extra2 = &net->ipv4.fqdir->high_thresh; table[2].data = &net->ipv4.fqdir->timeout; table[3].data = &net->ipv4.fqdir->max_dist; hdr = register_net_sysctl_sz(net, "net/ipv4", table, ARRAY_SIZE(ip4_frags_ns_ctl_table)); if (!hdr) goto err_reg; net->ipv4.frags_hdr = hdr; return 0; err_reg: if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; } static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net) { const struct ctl_table *table; table = net->ipv4.frags_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.frags_hdr); kfree(table); } static void __init ip4_frags_ctl_register(void) { register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table); } #else static int ip4_frags_ns_ctl_register(struct net *net) { return 0; } static void ip4_frags_ns_ctl_unregister(struct net *net) { } static void __init ip4_frags_ctl_register(void) { } #endif static int __net_init ipv4_frags_init_net(struct net *net) { int res; res = fqdir_init(&net->ipv4.fqdir, &ip4_frags, net); if (res < 0) return res; /* Fragment cache limits. * * The fragment memory accounting code, (tries to) account for * the real memory usage, by measuring both the size of frag * queue struct (inet_frag_queue (ipv4:ipq/ipv6:frag_queue)) * and the SKB's truesize. * * A 64K fragment consumes 129736 bytes (44*2944)+200 * (1500 truesize == 2944, sizeof(struct ipq) == 200) * * We will commit 4MB at one time. Should we cross that limit * we will prune down to 3MB, making room for approx 8 big 64K * fragments 8x128k. */ net->ipv4.fqdir->high_thresh = 4 * 1024 * 1024; net->ipv4.fqdir->low_thresh = 3 * 1024 * 1024; /* * Important NOTE! Fragment queue must be destroyed before MSL expires. * RFC791 is wrong proposing to prolongate timer each fragment arrival * by TTL. */ net->ipv4.fqdir->timeout = IP_FRAG_TIME; net->ipv4.fqdir->max_dist = 64; res = ip4_frags_ns_ctl_register(net); if (res < 0) fqdir_exit(net->ipv4.fqdir); return res; } static void __net_exit ipv4_frags_pre_exit_net(struct net *net) { fqdir_pre_exit(net->ipv4.fqdir); } static void __net_exit ipv4_frags_exit_net(struct net *net) { ip4_frags_ns_ctl_unregister(net); fqdir_exit(net->ipv4.fqdir); } static struct pernet_operations ip4_frags_ops = { .init = ipv4_frags_init_net, .pre_exit = ipv4_frags_pre_exit_net, .exit = ipv4_frags_exit_net, }; static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed) { return jhash2(data, sizeof(struct frag_v4_compare_key) / sizeof(u32), seed); } static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed) { const struct inet_frag_queue *fq = data; return jhash2((const u32 *)&fq->key.v4, sizeof(struct frag_v4_compare_key) / sizeof(u32), seed); } static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) { const struct frag_v4_compare_key *key = arg->key; const struct inet_frag_queue *fq = ptr; return !!memcmp(&fq->key, key, sizeof(*key)); } static const struct rhashtable_params ip4_rhash_params = { .head_offset = offsetof(struct inet_frag_queue, node), .key_offset = offsetof(struct inet_frag_queue, key), .key_len = sizeof(struct frag_v4_compare_key), .hashfn = ip4_key_hashfn, .obj_hashfn = ip4_obj_hashfn, .obj_cmpfn = ip4_obj_cmpfn, .automatic_shrinking = true, }; void __init ipfrag_init(void) { ip4_frags.constructor = ip4_frag_init; ip4_frags.destructor = ip4_frag_free; ip4_frags.qsize = sizeof(struct ipq); ip4_frags.frag_expire = ip_expire; ip4_frags.frags_cache_name = ip_frag_cache_name; ip4_frags.rhash_params = ip4_rhash_params; if (inet_frags_init(&ip4_frags)) panic("IP: failed to allocate ip4_frags cache\n"); ip4_frags_ctl_register(); register_pernet_subsys(&ip4_frags_ops); }
160 159 89 3 66 3 106 94 69 110 56 82 32 39 13 47 154 91 104 161 152 1 1 1 4 3 7 244 214 82 178 1 4 18 45 123 112 50 50 49 5 3 149 134 28 136 29 22 5 5 8 20 300 105 43 28 30 106 49 1 34 100 38 127 126 101 101 3 19 29 45 28 57 86 88 81 38 29 29 166 34 153 107 23 106 38 19 55 97 153 86 103 100 135 158 15 18 81 149 19 150 149 135 134 76 14 254 30 175 2 32 2 13 23 2 58 58 4 49 47 49 1 10 49 1 1 20 6 31 49 19 11 34 20 13 48 47 11 2 9 11 46 46 3 4 1 7 8 11 46 1 40 36 13 46 46 1 46 23 23 20 22 1 23 2 23 3 23 8 23 23 30 2 2 48 8 38 9 27 27 8 8 8 2 2 8 22 5 5 48 48 29 48 2 1 48 23 48 48 46 18 46 46 44 22 46 46 46 27 2 2 61 60 43 48 60 8 58 58 7 8 1 20 57 11 20 44 44 51 47 20 44 8 42 27 27 27 11 45 46 46 58 58 58 58 57 61 60 61 61 6 6 4 4 4 4 4 4 4 4 2 384 45 2 9 42 1 4 1 5 4 1 5 5 5 5 1 4 1 1 1 5 5 5 5 10 38 37 33 7 18 45 1 1 1 1 1 33 32 1 32 32 1 30 2 32 61 6 6 135 35 29 31 5 31 31 12 17 3 16 20 100 18 98 40 34 15 41 41 41 40 1 6 37 1 2 2 2 1 2 45 45 45 2 42 43 1 4 6 6 5 5 5 23 1 1 1 15 4 56 6 56 10 63 63 61 1 10 62 43 49 7 4 2 27 2 1 29 55 41 4 23 45 45 45 1 6 5 5 6 6 54 4 2 6 56 59 41 2 44 241 222 279 14 167 34 240 241 242 134 318 185 255 118 255 76 209 6 23 279 6 5 232 234 199 23 8 35 30 224 39 4 231 37 234 35 152 126 23 234 232 29 232 67 239 184 198 182 70 48 175 281 46 235 4 229 4 180 44 44 240 283 282 283 200 184 185 186 13 24 4 56 25 35 231 134 257 229 129 45 280 283 222 284 200 158 114 80 80 106 49 2 18 15 49 23 40 36 52 18 18 18 18 56 56 131 215 16 11 2 2 4 4 4 6 1 277 12 280 53 281 1 286 193 234 286 74 249 24 261 162 284 281 61 284 1 284 48 286 285 283 281 283 281 16 283 59 29 180 234 180 242 282 283 283 41 40 41 1 40 40 1 3 3 3 6 5 35 5 1 2 378 375 372 42 181 5 12 5 83 130 2 82 127 3 4 129 141 82 133 6 2 61 37 8 10 8 49 6 6 205 27 144 188 26 126 230 225 8 7 3 16 4 18 2 8 7 2 4 2 1 25 256 23 8 13 4 111 103 104 80 22 83 4 18 102 38 2 36 1 37 8 28 28 2 19 30 9 11 29 82 13 2 56 2 18 76 35 28 8 8 8 17 17 8 9 11 2 8 11 14 33 2 31 28 17 19 11 11 1 19 20 23 8 8 132 21 131 95 87 85 11 20 145 145 35 25 41 2 1 42 34 24 35 7 196 132 116 28 17 61 14 5 43 12 14 33 14 15 5 7 14 33 15 4 33 17 5 53 58 59 198 170 117 84 181 181 20 1 12 7 2 16 5 177 179 242 241 128 211 211 166 163 2 7 30 133 135 31 145 43 24 29 159 31 165 146 78 8 157 72 72 75 3 13 13 61 27 14 21 476 463 27 9 27 10 20 8 3 3 21 14 21 20 17 21 3 21 19 18 17 20 14 26 15 12 14 12 14 20 8 10 12 11 12 2 27 27 27 17 11 20 2 88 91 83 91 326 311 91 86 45 108 276 200 196 2 94 201 164 94 5 43 47 22 170 190 66 66 2 66 63 2 2 11 273 249 65 250 56 53 3 262 264 1 227 7 1 4 2 2 262 3 21 18 36 3 18 212 80 4 77 12 7 213 214 155 74 181 28 83 81 1 25 25 25 70 7 37 4 38 33 11 45 188 189 186 168 168 168 152 152 152 152 152 74 26 48 70 3 46 28 25 25 24 1 1 24 1 1 24 45 3 126 157 56 3 3 128 82 47 1 48 48 48 48 23 25 48 1 10 1 11 35 2 79 5 1 4 78 82 371 373 4 21 1 6 107 209 36 118 16 112 4 78 2 76 78 78 2 76 78 75 3 43 41 2 1 1 1 1 35 5 108 2 29 10 4 105 105 86 87 66 20 1 8 102 102 102 102 7 7 6 83 1 1 1 101 102 7 102 100 31 79 7 7 83 87 84 7 1 6 87 84 84 7 84 31 31 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 // SPDX-License-Identifier: GPL-2.0 /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Implementation of the Transmission Control Protocol(TCP). * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * Corey Minyard <wf-rch!minyard@relay.EU.net> * Florian La Roche, <flla@stud.uni-sb.de> * Charles Hedrick, <hedrick@klinzhai.rutgers.edu> * Linus Torvalds, <torvalds@cs.helsinki.fi> * Alan Cox, <gw4pts@gw4pts.ampr.org> * Matthew Dillon, <dillon@apollo.west.oic.com> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * Jorge Cwik, <jorge@laser.satlink.net> */ /* * Changes: * Pedro Roque : Fast Retransmit/Recovery. * Two receive queues. * Retransmit queue handled by TCP. * Better retransmit timer handling. * New congestion avoidance. * Header prediction. * Variable renaming. * * Eric : Fast Retransmit. * Randy Scott : MSS option defines. * Eric Schenk : Fixes to slow start algorithm. * Eric Schenk : Yet another double ACK bug. * Eric Schenk : Delayed ACK bug fixes. * Eric Schenk : Floyd style fast retrans war avoidance. * David S. Miller : Don't allow zero congestion window. * Eric Schenk : Fix retransmitter so that it sends * next packet on ack of previous packet. * Andi Kleen : Moved open_request checking here * and process RSTs for open_requests. * Andi Kleen : Better prune_queue, and other fixes. * Andrey Savochkin: Fix RTT measurements in the presence of * timestamps. * Andrey Savochkin: Check sequence numbers correctly when * removing SACKs due to in sequence incoming * data segments. * Andi Kleen: Make sure we never ack data there is not * enough room for. Also make this condition * a fatal error if it might still happen. * Andi Kleen: Add tcp_measure_rcv_mss to make * connections with MSS<min(MTU,ann. MSS) * work without delayed acks. * Andi Kleen: Process packets with PSH set in the * fast path. * J Hadi Salim: ECN support * Andrei Gurtov, * Pasi Sarolahti, * Panu Kuhlberg: Experimental audit of TCP (re)transmission * engine. Lots of bugs are found. * Pasi Sarolahti: F-RTO for dealing with spurious RTOs */ #define pr_fmt(fmt) "TCP: " fmt #include <linux/mm.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/sysctl.h> #include <linux/kernel.h> #include <linux/prefetch.h> #include <linux/bitops.h> #include <net/dst.h> #include <net/tcp.h> #include <net/tcp_ecn.h> #include <net/proto_memory.h> #include <net/inet_common.h> #include <linux/ipsec.h> #include <linux/unaligned.h> #include <linux/errqueue.h> #include <trace/events/tcp.h> #include <linux/jump_label_ratelimit.h> #include <net/busy_poll.h> #include <net/mptcp.h> int sysctl_tcp_max_orphans __read_mostly = NR_FILE; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ #define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */ #define FLAG_RETRANS_DATA_ACKED 0x08 /* "" "" some of which was retransmitted. */ #define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ #define FLAG_DATA_SACKED 0x20 /* New SACK. */ #define FLAG_ECE 0x40 /* ECE in this ACK */ #define FLAG_LOST_RETRANS 0x80 /* This ACK marks some retransmission lost */ #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ #define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ #define FLAG_SET_XMIT_TIMER 0x1000 /* Set TLP or RTO timer */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ #define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ #define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ #define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */ #define FLAG_DSACK_TLP 0x20000 /* DSACK for tail loss probe */ #define FLAG_TS_PROGRESS 0x40000 /* Positive timestamp delta */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) #define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK) #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) #define REXMIT_NONE 0 /* no loss recovery to do */ #define REXMIT_LOST 1 /* retransmit packets marked lost */ #define REXMIT_NEW 2 /* FRTO-style transmit of unsent/new packets */ #if IS_ENABLED(CONFIG_TLS_DEVICE) static DEFINE_STATIC_KEY_DEFERRED_FALSE(clean_acked_data_enabled, HZ); void clean_acked_data_enable(struct tcp_sock *tp, void (*cad)(struct sock *sk, u32 ack_seq)) { tp->tcp_clean_acked = cad; static_branch_deferred_inc(&clean_acked_data_enabled); } EXPORT_SYMBOL_GPL(clean_acked_data_enable); void clean_acked_data_disable(struct tcp_sock *tp) { static_branch_slow_dec_deferred(&clean_acked_data_enabled); tp->tcp_clean_acked = NULL; } EXPORT_SYMBOL_GPL(clean_acked_data_disable); void clean_acked_data_flush(void) { static_key_deferred_flush(&clean_acked_data_enabled); } EXPORT_SYMBOL_GPL(clean_acked_data_flush); #endif #ifdef CONFIG_CGROUP_BPF static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb) { bool unknown_opt = tcp_sk(sk)->rx_opt.saw_unknown && BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG); bool parse_all_opt = BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG); struct bpf_sock_ops_kern sock_ops; if (likely(!unknown_opt && !parse_all_opt)) return; /* The skb will be handled in the * bpf_skops_established() or * bpf_skops_write_hdr_opt(). */ switch (sk->sk_state) { case TCP_SYN_RECV: case TCP_SYN_SENT: case TCP_LISTEN: return; } sock_owned_by_me(sk); memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp)); sock_ops.op = BPF_SOCK_OPS_PARSE_HDR_OPT_CB; sock_ops.is_fullsock = 1; sock_ops.is_locked_tcp_sock = 1; sock_ops.sk = sk; bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb)); BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops); } static void bpf_skops_established(struct sock *sk, int bpf_op, struct sk_buff *skb) { struct bpf_sock_ops_kern sock_ops; sock_owned_by_me(sk); memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp)); sock_ops.op = bpf_op; sock_ops.is_fullsock = 1; sock_ops.is_locked_tcp_sock = 1; sock_ops.sk = sk; /* sk with TCP_REPAIR_ON does not have skb in tcp_finish_connect */ if (skb) bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb)); BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops); } #else static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb) { } static void bpf_skops_established(struct sock *sk, int bpf_op, struct sk_buff *skb) { } #endif static __cold void tcp_gro_dev_warn(const struct sock *sk, const struct sk_buff *skb, unsigned int len) { struct net_device *dev; rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif); if (!dev || len >= READ_ONCE(dev->mtu)) pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n", dev ? dev->name : "Unknown driver"); rcu_read_unlock(); } /* Adapt the MSS value used to make delayed ack decision to the * real world. */ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) { struct inet_connection_sock *icsk = inet_csk(sk); const unsigned int lss = icsk->icsk_ack.last_seg_size; unsigned int len; icsk->icsk_ack.last_seg_size = 0; /* skb->len may jitter because of SACKs, even if peer * sends good full-sized frames. */ len = skb_shinfo(skb)->gso_size ? : skb->len; if (len >= icsk->icsk_ack.rcv_mss) { /* Note: divides are still a bit expensive. * For the moment, only adjust scaling_ratio * when we update icsk_ack.rcv_mss. */ if (unlikely(len != icsk->icsk_ack.rcv_mss)) { u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE; u8 old_ratio = tcp_sk(sk)->scaling_ratio; do_div(val, skb->truesize); tcp_sk(sk)->scaling_ratio = val ? val : 1; if (old_ratio != tcp_sk(sk)->scaling_ratio) { struct tcp_sock *tp = tcp_sk(sk); val = tcp_win_from_space(sk, sk->sk_rcvbuf); tcp_set_window_clamp(sk, val); if (tp->window_clamp < tp->rcvq_space.space) tp->rcvq_space.space = tp->window_clamp; } } icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, tcp_sk(sk)->advmss); /* Account for possibly-removed options */ DO_ONCE_LITE_IF(len > icsk->icsk_ack.rcv_mss + MAX_TCP_OPTION_SPACE, tcp_gro_dev_warn, sk, skb, len); /* If the skb has a len of exactly 1*MSS and has the PSH bit * set then it is likely the end of an application write. So * more data may not be arriving soon, and yet the data sender * may be waiting for an ACK if cwnd-bound or using TX zero * copy. So we set ICSK_ACK_PUSHED here so that * tcp_cleanup_rbuf() will send an ACK immediately if the app * reads all of the data and is not ping-pong. If len > MSS * then this logic does not matter (and does not hurt) because * tcp_cleanup_rbuf() will always ACK immediately if the app * reads data and there is more than an MSS of unACKed data. */ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_PSH) icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; } else { /* Otherwise, we make more careful check taking into account, * that SACKs block is variable. * * "len" is invariant segment length, including TCP header. */ len += skb->data - skb_transport_header(skb); if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) || /* If PSH is not set, packet should be * full sized, provided peer TCP is not badly broken. * This observation (if it is correct 8)) allows * to handle super-low mtu links fairly. */ (len >= TCP_MIN_MSS + sizeof(struct tcphdr) && !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) { /* Subtract also invariant (if peer is RFC compliant), * tcp header plus fixed timestamp option length. * Resulting "len" is MSS free of SACK jitter. */ len -= tcp_sk(sk)->tcp_header_len; icsk->icsk_ack.last_seg_size = len; if (len == lss) { icsk->icsk_ack.rcv_mss = len; return; } } if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2; icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; } } static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks) { struct inet_connection_sock *icsk = inet_csk(sk); unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); if (quickacks == 0) quickacks = 2; quickacks = min(quickacks, max_quickacks); if (quickacks > icsk->icsk_ack.quick) icsk->icsk_ack.quick = quickacks; } static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) { struct inet_connection_sock *icsk = inet_csk(sk); tcp_incr_quickack(sk, max_quickacks); inet_csk_exit_pingpong_mode(sk); icsk->icsk_ack.ato = TCP_ATO_MIN; } /* Send ACKs quickly, if "quick" count is not exhausted * and the session is not interactive. */ static bool tcp_in_quickack_mode(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); return icsk->icsk_ack.dst_quick_ack || (icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk)); } static void tcp_data_ecn_check(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); if (tcp_ecn_disabled(tp)) return; switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) { case INET_ECN_NOT_ECT: /* Funny extension: if ECT is not set on a segment, * and we already seen ECT on a previous segment, * it is probably a retransmit. */ if (tp->ecn_flags & TCP_ECN_SEEN) tcp_enter_quickack_mode(sk, 2); break; case INET_ECN_CE: if (tcp_ca_needs_ecn(sk)) tcp_ca_event(sk, CA_EVENT_ECN_IS_CE); if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR) && tcp_ecn_mode_rfc3168(tp)) { /* Better not delay acks, sender can have a very low cwnd */ tcp_enter_quickack_mode(sk, 2); tp->ecn_flags |= TCP_ECN_DEMAND_CWR; } /* As for RFC3168 ECN, the TCP_ECN_SEEN flag is set by * tcp_data_ecn_check() when the ECN codepoint of * received TCP data contains ECT(0), ECT(1), or CE. */ if (!tcp_ecn_mode_rfc3168(tp)) break; tp->ecn_flags |= TCP_ECN_SEEN; break; default: if (tcp_ca_needs_ecn(sk)) tcp_ca_event(sk, CA_EVENT_ECN_NO_CE); if (!tcp_ecn_mode_rfc3168(tp)) break; tp->ecn_flags |= TCP_ECN_SEEN; break; } } /* Returns true if the byte counters can be used */ static bool tcp_accecn_process_option(struct tcp_sock *tp, const struct sk_buff *skb, u32 delivered_bytes, int flag) { u8 estimate_ecnfield = tp->est_ecnfield; bool ambiguous_ecn_bytes_incr = false; bool first_changed = false; unsigned int optlen; bool order1, res; unsigned int i; u8 *ptr; if (tcp_accecn_opt_fail_recv(tp)) return false; if (!(flag & FLAG_SLOWPATH) || !tp->rx_opt.accecn) { if (!tp->saw_accecn_opt) { /* Too late to enable after this point due to * potential counter wraps */ if (tp->bytes_sent >= (1 << 23) - 1) { u8 saw_opt = TCP_ACCECN_OPT_FAIL_SEEN; tcp_accecn_saw_opt_fail_recv(tp, saw_opt); } return false; } if (estimate_ecnfield) { u8 ecnfield = estimate_ecnfield - 1; tp->delivered_ecn_bytes[ecnfield] += delivered_bytes; return true; } return false; } ptr = skb_transport_header(skb) + tp->rx_opt.accecn; optlen = ptr[1] - 2; if (WARN_ON_ONCE(ptr[0] != TCPOPT_ACCECN0 && ptr[0] != TCPOPT_ACCECN1)) return false; order1 = (ptr[0] == TCPOPT_ACCECN1); ptr += 2; if (tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) { tp->saw_accecn_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn); if (tp->saw_accecn_opt == TCP_ACCECN_OPT_FAIL_SEEN) tcp_accecn_fail_mode_set(tp, TCP_ACCECN_OPT_FAIL_RECV); } res = !!estimate_ecnfield; for (i = 0; i < 3; i++) { u32 init_offset; u8 ecnfield; s32 delta; u32 *cnt; if (optlen < TCPOLEN_ACCECN_PERFIELD) break; ecnfield = tcp_accecn_optfield_to_ecnfield(i, order1); init_offset = tcp_accecn_field_init_offset(ecnfield); cnt = &tp->delivered_ecn_bytes[ecnfield - 1]; delta = tcp_update_ecn_bytes(cnt, ptr, init_offset); if (delta && delta < 0) { res = false; ambiguous_ecn_bytes_incr = true; } if (delta && ecnfield != estimate_ecnfield) { if (!first_changed) { tp->est_ecnfield = ecnfield; first_changed = true; } else { res = false; ambiguous_ecn_bytes_incr = true; } } optlen -= TCPOLEN_ACCECN_PERFIELD; ptr += TCPOLEN_ACCECN_PERFIELD; } if (ambiguous_ecn_bytes_incr) tp->est_ecnfield = 0; return res; } static void tcp_count_delivered_ce(struct tcp_sock *tp, u32 ecn_count) { tp->delivered_ce += ecn_count; } /* Updates the delivered and delivered_ce counts */ static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered, bool ece_ack) { tp->delivered += delivered; if (tcp_ecn_mode_rfc3168(tp) && ece_ack) tcp_count_delivered_ce(tp, delivered); } /* Returns the ECN CE delta */ static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb, u32 delivered_pkts, u32 delivered_bytes, int flag) { u32 old_ceb = tcp_sk(sk)->delivered_ecn_bytes[INET_ECN_CE - 1]; const struct tcphdr *th = tcp_hdr(skb); struct tcp_sock *tp = tcp_sk(sk); u32 delta, safe_delta, d_ceb; bool opt_deltas_valid; u32 corrected_ace; /* Reordered ACK or uncertain due to lack of data to send and ts */ if (!(flag & (FLAG_FORWARD_PROGRESS | FLAG_TS_PROGRESS))) return 0; opt_deltas_valid = tcp_accecn_process_option(tp, skb, delivered_bytes, flag); if (!(flag & FLAG_SLOWPATH)) { /* AccECN counter might overflow on large ACKs */ if (delivered_pkts <= TCP_ACCECN_CEP_ACE_MASK) return 0; } /* ACE field is not available during handshake */ if (flag & FLAG_SYN_ACKED) return 0; if (tp->received_ce_pending >= TCP_ACCECN_ACE_MAX_DELTA) inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; corrected_ace = tcp_accecn_ace(th) - TCP_ACCECN_CEP_INIT_OFFSET; delta = (corrected_ace - tp->delivered_ce) & TCP_ACCECN_CEP_ACE_MASK; if (delivered_pkts <= TCP_ACCECN_CEP_ACE_MASK) return delta; safe_delta = delivered_pkts - ((delivered_pkts - delta) & TCP_ACCECN_CEP_ACE_MASK); if (opt_deltas_valid) { d_ceb = tp->delivered_ecn_bytes[INET_ECN_CE - 1] - old_ceb; if (!d_ceb) return delta; if ((delivered_pkts >= (TCP_ACCECN_CEP_ACE_MASK + 1) * 2) && (tcp_is_sack(tp) || ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Open | TCPF_CA_CWR)))) { u32 est_d_cep; if (delivered_bytes <= d_ceb) return safe_delta; est_d_cep = DIV_ROUND_UP_ULL((u64)d_ceb * delivered_pkts, delivered_bytes); return min(safe_delta, delta + (est_d_cep & ~TCP_ACCECN_CEP_ACE_MASK)); } if (d_ceb > delta * tp->mss_cache) return safe_delta; if (d_ceb < safe_delta * tp->mss_cache >> TCP_ACCECN_SAFETY_SHIFT) return delta; } return safe_delta; } static u32 tcp_accecn_process(struct sock *sk, const struct sk_buff *skb, u32 delivered_pkts, u32 delivered_bytes, int *flag) { struct tcp_sock *tp = tcp_sk(sk); u32 delta; delta = __tcp_accecn_process(sk, skb, delivered_pkts, delivered_bytes, *flag); if (delta > 0) { tcp_count_delivered_ce(tp, delta); *flag |= FLAG_ECE; /* Recalculate header predictor */ if (tp->pred_flags) tcp_fast_path_on(tp); } return delta; } /* Buffer size and advertised window tuning. * * 1. Tuning sk->sk_sndbuf, when connection enters established state. */ static void tcp_sndbuf_expand(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; int sndmem, per_mss; u32 nr_segs; /* Worst case is non GSO/TSO : each frame consumes one skb * and skb->head is kmalloced using power of two area of memory */ per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + MAX_TCP_HEADER + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); per_mss = roundup_pow_of_two(per_mss) + SKB_DATA_ALIGN(sizeof(struct sk_buff)); nr_segs = max_t(u32, TCP_INIT_CWND, tcp_snd_cwnd(tp)); nr_segs = max_t(u32, nr_segs, tp->reordering + 1); /* Fast Recovery (RFC 5681 3.2) : * Cubic needs 1.7 factor, rounded to 2 to include * extra cushion (application might react slowly to EPOLLOUT) */ sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2; sndmem *= nr_segs * per_mss; if (sk->sk_sndbuf < sndmem) WRITE_ONCE(sk->sk_sndbuf, min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2]))); } /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) * * All tcp_full_space() is split to two parts: "network" buffer, allocated * forward and advertised in receiver window (tp->rcv_wnd) and * "application buffer", required to isolate scheduling/application * latencies from network. * window_clamp is maximal advertised window. It can be less than * tcp_full_space(), in this case tcp_full_space() - window_clamp * is reserved for "application" buffer. The less window_clamp is * the smoother our behaviour from viewpoint of network, but the lower * throughput and the higher sensitivity of the connection to losses. 8) * * rcv_ssthresh is more strict window_clamp used at "slow start" * phase to predict further behaviour of this connection. * It is used for two goals: * - to enforce header prediction at sender, even when application * requires some significant "application buffer". It is check #1. * - to prevent pruning of receive queue because of misprediction * of receiver window. Check #2. * * The scheme does not work when sender sends good segments opening * window and then starts to feed us spaghetti. But it should work * in common situations. Otherwise, we have to rely on queue collapsing. */ /* Slow part of check#2. */ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb, unsigned int skbtruesize) { const struct tcp_sock *tp = tcp_sk(sk); /* Optimize this! */ int truesize = tcp_win_from_space(sk, skbtruesize) >> 1; int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1; while (tp->rcv_ssthresh <= window) { if (truesize <= skb->len) return 2 * inet_csk(sk)->icsk_ack.rcv_mss; truesize >>= 1; window >>= 1; } return 0; } /* Even if skb appears to have a bad len/truesize ratio, TCP coalescing * can play nice with us, as sk_buff and skb->head might be either * freed or shared with up to MAX_SKB_FRAGS segments. * Only give a boost to drivers using page frag(s) to hold the frame(s), * and if no payload was pulled in skb->head before reaching us. */ static u32 truesize_adjust(bool adjust, const struct sk_buff *skb) { u32 truesize = skb->truesize; if (adjust && !skb_headlen(skb)) { truesize -= SKB_TRUESIZE(skb_end_offset(skb)); /* paranoid check, some drivers might be buggy */ if (unlikely((int)truesize < (int)skb->len)) truesize = skb->truesize; } return truesize; } static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb, bool adjust) { struct tcp_sock *tp = tcp_sk(sk); int room; room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh; if (room <= 0) return; /* Check #1 */ if (!tcp_under_memory_pressure(sk)) { unsigned int truesize = truesize_adjust(adjust, skb); int incr; /* Check #2. Increase window, if skb with such overhead * will fit to rcvbuf in future. */ if (tcp_win_from_space(sk, truesize) <= skb->len) incr = 2 * tp->advmss; else incr = __tcp_grow_window(sk, skb, truesize); if (incr) { incr = max_t(int, incr, 2 * skb->len); tp->rcv_ssthresh += min(room, incr); inet_csk(sk)->icsk_ack.quick |= 1; } } else { /* Under pressure: * Adjust rcv_ssthresh according to reserved mem */ tcp_adjust_rcv_ssthresh(sk); } } /* 3. Try to fixup all. It is made immediately after connection enters * established state. */ static void tcp_init_buffer_space(struct sock *sk) { int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win); struct tcp_sock *tp = tcp_sk(sk); int maxwin; if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) tcp_sndbuf_expand(sk); tcp_mstamp_refresh(tp); tp->rcvq_space.time = tp->tcp_mstamp; tp->rcvq_space.seq = tp->copied_seq; maxwin = tcp_full_space(sk); if (tp->window_clamp >= maxwin) { WRITE_ONCE(tp->window_clamp, maxwin); if (tcp_app_win && maxwin > 4 * tp->advmss) WRITE_ONCE(tp->window_clamp, max(maxwin - (maxwin >> tcp_app_win), 4 * tp->advmss)); } /* Force reservation of one segment. */ if (tcp_app_win && tp->window_clamp > 2 * tp->advmss && tp->window_clamp + tp->advmss > maxwin) WRITE_ONCE(tp->window_clamp, max(2 * tp->advmss, maxwin - tp->advmss)); tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp); tp->snd_cwnd_stamp = tcp_jiffies32; tp->rcvq_space.space = min3(tp->rcv_ssthresh, tp->rcv_wnd, (u32)TCP_INIT_CWND * tp->advmss); } /* 4. Recalculate window clamp after socket hit its memory bounds. */ static void tcp_clamp_window(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); struct net *net = sock_net(sk); int rmem2; icsk->icsk_ack.quick = 0; rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]); if (sk->sk_rcvbuf < rmem2 && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && !tcp_under_memory_pressure(sk) && sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { WRITE_ONCE(sk->sk_rcvbuf, min(atomic_read(&sk->sk_rmem_alloc), rmem2)); } if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss); } /* Initialize RCV_MSS value. * RCV_MSS is an our guess about MSS used by the peer. * We haven't any direct information about the MSS. * It's better to underestimate the RCV_MSS rather than overestimate. * Overestimations make us ACKing less frequently than needed. * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss(). */ void tcp_initialize_rcv_mss(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); hint = min(hint, tp->rcv_wnd / 2); hint = min(hint, TCP_MSS_DEFAULT); hint = max(hint, TCP_MIN_MSS); inet_csk(sk)->icsk_ack.rcv_mss = hint; } EXPORT_IPV6_MOD(tcp_initialize_rcv_mss); /* Receiver "autotuning" code. * * The algorithm for RTT estimation w/o timestamps is based on * Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL. * <https://public.lanl.gov/radiant/pubs.html#DRS> * * More detail on this code can be found at * <http://staff.psc.edu/jheffner/>, * though this reference is out of date. A new paper * is pending. */ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep) { u32 new_sample, old_sample = tp->rcv_rtt_est.rtt_us; long m = sample << 3; if (old_sample == 0 || m < old_sample) { new_sample = m; } else { /* If we sample in larger samples in the non-timestamp * case, we could grossly overestimate the RTT especially * with chatty applications or bulk transfer apps which * are stalled on filesystem I/O. * * Also, since we are only going for a minimum in the * non-timestamp case, we do not smooth things out * else with timestamps disabled convergence takes too * long. */ if (win_dep) return; /* Do not use this sample if receive queue is not empty. */ if (tp->rcv_nxt != tp->copied_seq) return; new_sample = old_sample - (old_sample >> 3) + sample; } tp->rcv_rtt_est.rtt_us = new_sample; } static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp) { u32 delta_us; if (tp->rcv_rtt_est.time == 0) goto new_measure; if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq)) return; delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time); if (!delta_us) delta_us = 1; tcp_rcv_rtt_update(tp, delta_us, 1); new_measure: tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd; tp->rcv_rtt_est.time = tp->tcp_mstamp; } static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp, u32 min_delta) { u32 delta, delta_us; delta = tcp_time_stamp_ts(tp) - tp->rx_opt.rcv_tsecr; if (tp->tcp_usec_ts) return delta; if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { if (!delta) delta = min_delta; delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); return delta_us; } return -1; } static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr) return; tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr; if (TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) { s32 delta = tcp_rtt_tsopt_us(tp, 0); if (delta > 0) tcp_rcv_rtt_update(tp, delta, 0); } } void tcp_rcvbuf_grow(struct sock *sk, u32 newval) { const struct net *net = sock_net(sk); struct tcp_sock *tp = tcp_sk(sk); u32 rcvwin, rcvbuf, cap, oldval; u64 grow; oldval = tp->rcvq_space.space; tp->rcvq_space.space = newval; if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) || (sk->sk_userlocks & SOCK_RCVBUF_LOCK)) return; /* DRS is always one RTT late. */ rcvwin = newval << 1; /* slow start: allow the sender to double its rate. */ grow = (u64)rcvwin * (newval - oldval); do_div(grow, oldval); rcvwin += grow << 1; if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) rcvwin += TCP_SKB_CB(tp->ooo_last_skb)->end_seq - tp->rcv_nxt; cap = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]); rcvbuf = min_t(u32, tcp_space_from_win(sk, rcvwin), cap); if (rcvbuf > sk->sk_rcvbuf) { WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); /* Make the window clamp follow along. */ WRITE_ONCE(tp->window_clamp, tcp_win_from_space(sk, rcvbuf)); } } /* * This function should be called every time data is copied to user space. * It calculates the appropriate TCP receive buffer space. */ void tcp_rcv_space_adjust(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); int time, inq, copied; trace_tcp_rcv_space_adjust(sk); tcp_mstamp_refresh(tp); time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) return; /* Number of bytes copied to user in last RTT */ copied = tp->copied_seq - tp->rcvq_space.seq; /* Number of bytes in receive queue. */ inq = tp->rcv_nxt - tp->copied_seq; copied -= inq; if (copied <= tp->rcvq_space.space) goto new_measure; trace_tcp_rcvbuf_grow(sk, time); tcp_rcvbuf_grow(sk, copied); new_measure: tp->rcvq_space.seq = tp->copied_seq; tp->rcvq_space.time = tp->tcp_mstamp; } static void tcp_save_lrcv_flowlabel(struct sock *sk, const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IPV6) struct inet_connection_sock *icsk = inet_csk(sk); if (skb->protocol == htons(ETH_P_IPV6)) icsk->icsk_ack.lrcv_flowlabel = ntohl(ip6_flowlabel(ipv6_hdr(skb))); #endif } /* There is something which you must keep in mind when you analyze the * behavior of the tp->ato delayed ack timeout interval. When a * connection starts up, we want to ack as quickly as possible. The * problem is that "good" TCP's do slow start at the beginning of data * transmission. The means that until we send the first few ACK's the * sender will sit on his end and only queue most of his data, because * he can only send snd_cwnd unacked packets at any given time. For * each ACK we send, he increments snd_cwnd and transmits more of his * queue. -DaveM */ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); u32 now; inet_csk_schedule_ack(sk); tcp_measure_rcv_mss(sk, skb); tcp_rcv_rtt_measure(tp); now = tcp_jiffies32; if (!icsk->icsk_ack.ato) { /* The _first_ data packet received, initialize * delayed ACK engine. */ tcp_incr_quickack(sk, TCP_MAX_QUICKACKS); icsk->icsk_ack.ato = TCP_ATO_MIN; } else { int m = now - icsk->icsk_ack.lrcvtime; if (m <= TCP_ATO_MIN / 2) { /* The fastest case is the first. */ icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2; } else if (m < icsk->icsk_ack.ato) { icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m; if (icsk->icsk_ack.ato > icsk->icsk_rto) icsk->icsk_ack.ato = icsk->icsk_rto; } else if (m > icsk->icsk_rto) { /* Too long gap. Apparently sender failed to * restart window, so that we send ACKs quickly. */ tcp_incr_quickack(sk, TCP_MAX_QUICKACKS); } } icsk->icsk_ack.lrcvtime = now; tcp_save_lrcv_flowlabel(sk, skb); tcp_data_ecn_check(sk, skb); if (skb->len >= 128) tcp_grow_window(sk, skb, true); } /* Called to compute a smoothed rtt estimate. The data fed to this * routine either comes from timestamps, or from segments that were * known _not_ to have been retransmitted [see Karn/Partridge * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 * piece by Van Jacobson. * NOTE: the next three routines used to be one big routine. * To save cycles in the RFC 1323 implementation it was better to break * it up into three procedures. -- erics */ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) { struct tcp_sock *tp = tcp_sk(sk); long m = mrtt_us; /* RTT */ u32 srtt = tp->srtt_us; /* The following amusing code comes from Jacobson's * article in SIGCOMM '88. Note that rtt and mdev * are scaled versions of rtt and mean deviation. * This is designed to be as fast as possible * m stands for "measurement". * * On a 1990 paper the rto value is changed to: * RTO = rtt + 4 * mdev * * Funny. This algorithm seems to be very broken. * These formulae increase RTO, when it should be decreased, increase * too slowly, when it should be increased quickly, decrease too quickly * etc. I guess in BSD RTO takes ONE value, so that it is absolutely * does not matter how to _calculate_ it. Seems, it was trap * that VJ failed to avoid. 8) */ if (srtt != 0) { m -= (srtt >> 3); /* m is now error in rtt est */ srtt += m; /* rtt = 7/8 rtt + 1/8 new */ if (m < 0) { m = -m; /* m is now abs(error) */ m -= (tp->mdev_us >> 2); /* similar update on mdev */ /* This is similar to one of Eifel findings. * Eifel blocks mdev updates when rtt decreases. * This solution is a bit different: we use finer gain * for mdev in this case (alpha*beta). * Like Eifel it also prevents growth of rto, * but also it limits too fast rto decreases, * happening in pure Eifel. */ if (m > 0) m >>= 3; } else { m -= (tp->mdev_us >> 2); /* similar update on mdev */ } tp->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ if (tp->mdev_us > tp->mdev_max_us) { tp->mdev_max_us = tp->mdev_us; if (tp->mdev_max_us > tp->rttvar_us) tp->rttvar_us = tp->mdev_max_us; } if (after(tp->snd_una, tp->rtt_seq)) { if (tp->mdev_max_us < tp->rttvar_us) tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2; tp->rtt_seq = tp->snd_nxt; tp->mdev_max_us = tcp_rto_min_us(sk); tcp_bpf_rtt(sk, mrtt_us, srtt); } } else { /* no previous measure. */ srtt = m << 3; /* take the measured time to be rtt */ tp->mdev_us = m << 1; /* make sure rto = 3*rtt */ tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk)); tp->mdev_max_us = tp->rttvar_us; tp->rtt_seq = tp->snd_nxt; tcp_bpf_rtt(sk, mrtt_us, srtt); } tp->srtt_us = max(1U, srtt); } static void tcp_update_pacing_rate(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); u64 rate; /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3); /* current rate is (cwnd * mss) / srtt * In Slow Start [1], set sk_pacing_rate to 200 % the current rate. * In Congestion Avoidance phase, set it to 120 % the current rate. * * [1] : Normal Slow Start condition is (tp->snd_cwnd < tp->snd_ssthresh) * If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching * end of slow start and should slow down. */ if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2) rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio); else rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio); rate *= max(tcp_snd_cwnd(tp), tp->packets_out); if (likely(tp->srtt_us)) do_div(rate, tp->srtt_us); /* WRITE_ONCE() is needed because sch_fq fetches sk_pacing_rate * without any lock. We want to make sure compiler wont store * intermediate values in this location. */ WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate, READ_ONCE(sk->sk_max_pacing_rate))); } /* Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */ static void tcp_set_rto(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); /* Old crap is replaced with new one. 8) * * More seriously: * 1. If rtt variance happened to be less 50msec, it is hallucination. * It cannot be less due to utterly erratic ACK generation made * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ * to do with delayed acks, because at cwnd>2 true delack timeout * is invisible. Actually, Linux-2.4 also generates erratic * ACKs in some circumstances. */ inet_csk(sk)->icsk_rto = __tcp_set_rto(tp); /* 2. Fixups made earlier cannot be right. * If we do not estimate RTO correctly without them, * all the algo is pure shit and should be replaced * with correct one. It is exactly, which we pretend to do. */ /* NOTE: clamping at TCP_RTO_MIN is not required, current algo * guarantees that rto is higher. */ tcp_bound_rto(sk); } __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst) { __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); if (!cwnd) cwnd = TCP_INIT_CWND; return min_t(__u32, cwnd, tp->snd_cwnd_clamp); } struct tcp_sacktag_state { /* Timestamps for earliest and latest never-retransmitted segment * that was SACKed. RTO needs the earliest RTT to stay conservative, * but congestion control should still get an accurate delay signal. */ u64 first_sackt; u64 last_sackt; u32 reord; u32 sack_delivered; u32 delivered_bytes; int flag; unsigned int mss_now; struct rate_sample *rate; }; /* Take a notice that peer is sending D-SACKs. Skip update of data delivery * and spurious retransmission information if this DSACK is unlikely caused by * sender's action: * - DSACKed sequence range is larger than maximum receiver's window. * - Total no. of DSACKed segments exceed the total no. of retransmitted segs. */ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq, u32 end_seq, struct tcp_sacktag_state *state) { u32 seq_len, dup_segs = 1; if (!before(start_seq, end_seq)) return 0; seq_len = end_seq - start_seq; /* Dubious DSACK: DSACKed range greater than maximum advertised rwnd */ if (seq_len > tp->max_window) return 0; if (seq_len > tp->mss_cache) dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache); else if (tp->tlp_high_seq && tp->tlp_high_seq == end_seq) state->flag |= FLAG_DSACK_TLP; tp->dsack_dups += dup_segs; /* Skip the DSACK if dup segs weren't retransmitted by sender */ if (tp->dsack_dups > tp->total_retrans) return 0; tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; /* We increase the RACK ordering window in rounds where we receive * DSACKs that may have been due to reordering causing RACK to trigger * a spurious fast recovery. Thus RACK ignores DSACKs that happen * without having seen reordering, or that match TLP probes (TLP * is timer-driven, not triggered by RACK). */ if (tp->reord_seen && !(state->flag & FLAG_DSACK_TLP)) tp->rack.dsack_seen = 1; state->flag |= FLAG_DSACKING_ACK; /* A spurious retransmission is delivered */ state->sack_delivered += dup_segs; return dup_segs; } /* It's reordering when higher sequence was delivered (i.e. sacked) before * some lower never-retransmitted sequence ("low_seq"). The maximum reordering * distance is approximated in full-mss packet distance ("reordering"). */ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq, const int ts) { struct tcp_sock *tp = tcp_sk(sk); const u32 mss = tp->mss_cache; u32 fack, metric; fack = tcp_highest_sack_seq(tp); if (!before(low_seq, fack)) return; metric = fack - low_seq; if ((metric > tp->reordering * mss) && mss) { #if FASTRETRANS_DEBUG > 1 pr_debug("Disorder%d %d %u f%u s%u rr%d\n", tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, tp->reordering, 0, tp->sacked_out, tp->undo_marker ? tp->undo_retrans : 0); #endif tp->reordering = min_t(u32, (metric + mss - 1) / mss, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); } /* This exciting event is worth to be remembered. 8) */ tp->reord_seen++; NET_INC_STATS(sock_net(sk), ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER); } /* This must be called before lost_out or retrans_out are updated * on a new loss, because we want to know if all skbs previously * known to be lost have already been retransmitted, indicating * that this newly lost skb is our next skb to retransmit. */ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) { if ((!tp->retransmit_skb_hint && tp->retrans_out >= tp->lost_out) || (tp->retransmit_skb_hint && before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->retransmit_skb_hint)->seq))) tp->retransmit_skb_hint = skb; } /* Sum the number of packets on the wire we have marked as lost, and * notify the congestion control module that the given skb was marked lost. */ static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb) { tp->lost += tcp_skb_pcount(skb); } void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb) { __u8 sacked = TCP_SKB_CB(skb)->sacked; struct tcp_sock *tp = tcp_sk(sk); if (sacked & TCPCB_SACKED_ACKED) return; tcp_verify_retransmit_hint(tp, skb); if (sacked & TCPCB_LOST) { if (sacked & TCPCB_SACKED_RETRANS) { /* Account for retransmits that are lost again */ TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT, tcp_skb_pcount(skb)); tcp_notify_skb_loss_event(tp, skb); } } else { tp->lost_out += tcp_skb_pcount(skb); TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tcp_notify_skb_loss_event(tp, skb); } } /* This procedure tags the retransmission queue when SACKs arrive. * * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L). * Packets in queue with these bits set are counted in variables * sacked_out, retrans_out and lost_out, correspondingly. * * Valid combinations are: * Tag InFlight Description * 0 1 - orig segment is in flight. * S 0 - nothing flies, orig reached receiver. * L 0 - nothing flies, orig lost by net. * R 2 - both orig and retransmit are in flight. * L|R 1 - orig is lost, retransmit is in flight. * S|R 1 - orig reached receiver, retrans is still in flight. * (L|S|R is logically valid, it could occur when L|R is sacked, * but it is equivalent to plain S and code short-circuits it to S. * L|S is logically invalid, it would mean -1 packet in flight 8)) * * These 6 states form finite state machine, controlled by the following events: * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue()) * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) * 3. Loss detection event of two flavors: * A. Scoreboard estimator decided the packet is lost. * A'. Reno "three dupacks" marks head of queue lost. * B. SACK arrives sacking SND.NXT at the moment, when the * segment was retransmitted. * 4. D-SACK added new rule: D-SACK changes any tag to S. * * It is pleasant to note, that state diagram turns out to be commutative, * so that we are allowed not to be bothered by order of our actions, * when multiple events arrive simultaneously. (see the function below). * * Reordering detection. * -------------------- * Reordering metric is maximal distance, which a packet can be displaced * in packet stream. With SACKs we can estimate it: * * 1. SACK fills old hole and the corresponding segment was not * ever retransmitted -> reordering. Alas, we cannot use it * when segment was retransmitted. * 2. The last flaw is solved with D-SACK. D-SACK arrives * for retransmitted and already SACKed segment -> reordering.. * Both of these heuristics are not used in Loss state, when we cannot * account for retransmits accurately. * * SACK block validation. * ---------------------- * * SACK block range validation checks that the received SACK block fits to * the expected sequence limits, i.e., it is between SND.UNA and SND.NXT. * Note that SND.UNA is not included to the range though being valid because * it means that the receiver is rather inconsistent with itself reporting * SACK reneging when it should advance SND.UNA. Such SACK block this is * perfectly valid, however, in light of RFC2018 which explicitly states * that "SACK block MUST reflect the newest segment. Even if the newest * segment is going to be discarded ...", not that it looks very clever * in case of head skb. Due to potentional receiver driven attacks, we * choose to avoid immediate execution of a walk in write queue due to * reneging and defer head skb's loss recovery to standard loss recovery * procedure that will eventually trigger (nothing forbids us doing this). * * Implements also blockage to start_seq wrap-around. Problem lies in the * fact that though start_seq (s) is before end_seq (i.e., not reversed), * there's no guarantee that it will be before snd_nxt (n). The problem * happens when start_seq resides between end_seq wrap (e_w) and snd_nxt * wrap (s_w): * * <- outs wnd -> <- wrapzone -> * u e n u_w e_w s n_w * | | | | | | | * |<------------+------+----- TCP seqno space --------------+---------->| * ...-- <2^31 ->| |<--------... * ...---- >2^31 ------>| |<--------... * * Current code wouldn't be vulnerable but it's better still to discard such * crazy SACK blocks. Doing this check for start_seq alone closes somewhat * similar case (end_seq after snd_nxt wrap) as earlier reversed check in * snd_nxt wrap -> snd_una region will then become "well defined", i.e., * equal to the ideal case (infinite seqno space without wrap caused issues). * * With D-SACK the lower bound is extended to cover sequence space below * SND.UNA down to undo_marker, which is the last point of interest. Yet * again, D-SACK block must not to go across snd_una (for the same reason as * for the normal SACK blocks, explained above). But there all simplicity * ends, TCP might receive valid D-SACKs below that. As long as they reside * fully below undo_marker they do not affect behavior in anyway and can * therefore be safely ignored. In rare cases (which are more or less * theoretical ones), the D-SACK will nicely cross that boundary due to skb * fragmentation and packet reordering past skb's retransmission. To consider * them correctly, the acceptable range must be extended even more though * the exact amount is rather hard to quantify. However, tp->max_window can * be used as an exaggerated estimate. */ static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack, u32 start_seq, u32 end_seq) { /* Too far in future, or reversed (interpretation is ambiguous) */ if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq)) return false; /* Nasty start_seq wrap-around check (see comments above) */ if (!before(start_seq, tp->snd_nxt)) return false; /* In outstanding window? ...This is valid exit for D-SACKs too. * start_seq == snd_una is non-sensical (see comments above) */ if (after(start_seq, tp->snd_una)) return true; if (!is_dsack || !tp->undo_marker) return false; /* ...Then it's D-SACK, and must reside below snd_una completely */ if (after(end_seq, tp->snd_una)) return false; if (!before(start_seq, tp->undo_marker)) return true; /* Too old */ if (!after(end_seq, tp->undo_marker)) return false; /* Undo_marker boundary crossing (overestimates a lot). Known already: * start_seq < undo_marker and end_seq >= undo_marker. */ return !before(start_seq, end_seq - tp->max_window); } static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, struct tcp_sack_block_wire *sp, int num_sacks, u32 prior_snd_una, struct tcp_sacktag_state *state) { struct tcp_sock *tp = tcp_sk(sk); u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq); u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq); u32 dup_segs; if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV); } else if (num_sacks > 1) { u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq); u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq); if (after(end_seq_0, end_seq_1) || before(start_seq_0, start_seq_1)) return false; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKOFORECV); } else { return false; } dup_segs = tcp_dsack_seen(tp, start_seq_0, end_seq_0, state); if (!dup_segs) { /* Skip dubious DSACK */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKIGNOREDDUBIOUS); return false; } NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECVSEGS, dup_segs); /* D-SACK for already forgotten data... Do dumb counting. */ if (tp->undo_marker && tp->undo_retrans > 0 && !after(end_seq_0, prior_snd_una) && after(end_seq_0, tp->undo_marker)) tp->undo_retrans = max_t(int, 0, tp->undo_retrans - dup_segs); return true; } /* Check if skb is fully within the SACK block. In presence of GSO skbs, * the incoming SACK may not exactly match but we can find smaller MSS * aligned portion of it that matches. Therefore we might need to fragment * which may fail and creates some hassle (caller must handle error case * returns). * * FIXME: this could be merged to shift decision code */ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, u32 start_seq, u32 end_seq) { int err; bool in_sack; unsigned int pkt_len; unsigned int mss; in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && !before(end_seq, TCP_SKB_CB(skb)->end_seq); if (tcp_skb_pcount(skb) > 1 && !in_sack && after(TCP_SKB_CB(skb)->end_seq, start_seq)) { mss = tcp_skb_mss(skb); in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq); if (!in_sack) { pkt_len = start_seq - TCP_SKB_CB(skb)->seq; if (pkt_len < mss) pkt_len = mss; } else { pkt_len = end_seq - TCP_SKB_CB(skb)->seq; if (pkt_len < mss) return -EINVAL; } /* Round if necessary so that SACKs cover only full MSSes * and/or the remaining small portion (if present) */ if (pkt_len > mss) { unsigned int new_len = (pkt_len / mss) * mss; if (!in_sack && new_len < pkt_len) new_len += mss; pkt_len = new_len; } if (pkt_len >= skb->len && !in_sack) return 0; err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, pkt_len, mss, GFP_ATOMIC); if (err < 0) return err; } return in_sack; } /* Mark the given newly-SACKed range as such, adjusting counters and hints. */ static u8 tcp_sacktag_one(struct sock *sk, struct tcp_sacktag_state *state, u8 sacked, u32 start_seq, u32 end_seq, int dup_sack, int pcount, u32 plen, u64 xmit_time) { struct tcp_sock *tp = tcp_sk(sk); /* Account D-SACK for retransmitted packet. */ if (dup_sack && (sacked & TCPCB_RETRANS)) { if (tp->undo_marker && tp->undo_retrans > 0 && after(end_seq, tp->undo_marker)) tp->undo_retrans = max_t(int, 0, tp->undo_retrans - pcount); if ((sacked & TCPCB_SACKED_ACKED) && before(start_seq, state->reord)) state->reord = start_seq; } /* Nothing to do; acked frame is about to be dropped (was ACKed). */ if (!after(end_seq, tp->snd_una)) return sacked; if (!(sacked & TCPCB_SACKED_ACKED)) { tcp_rack_advance(tp, sacked, end_seq, xmit_time); if (sacked & TCPCB_SACKED_RETRANS) { /* If the segment is not tagged as lost, * we do not clear RETRANS, believing * that retransmission is still in flight. */ if (sacked & TCPCB_LOST) { sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); tp->lost_out -= pcount; tp->retrans_out -= pcount; } } else { if (!(sacked & TCPCB_RETRANS)) { /* New sack for not retransmitted frame, * which was in hole. It is reordering. */ if (before(start_seq, tcp_highest_sack_seq(tp)) && before(start_seq, state->reord)) state->reord = start_seq; if (!after(end_seq, tp->high_seq)) state->flag |= FLAG_ORIG_SACK_ACKED; if (state->first_sackt == 0) state->first_sackt = xmit_time; state->last_sackt = xmit_time; } if (sacked & TCPCB_LOST) { sacked &= ~TCPCB_LOST; tp->lost_out -= pcount; } } sacked |= TCPCB_SACKED_ACKED; state->flag |= FLAG_DATA_SACKED; tp->sacked_out += pcount; /* Out-of-order packets delivered */ state->sack_delivered += pcount; state->delivered_bytes += plen; } /* D-SACK. We can detect redundant retransmission in S|R and plain R * frames and clear it. undo_retrans is decreased above, L|R frames * are accounted above as well. */ if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) { sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= pcount; } return sacked; } /* Shift newly-SACKed bytes from this skb to the immediately previous * already-SACKed sk_buff. Mark the newly-SACKed bytes as such. */ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, struct sk_buff *skb, struct tcp_sacktag_state *state, unsigned int pcount, int shifted, int mss, bool dup_sack) { struct tcp_sock *tp = tcp_sk(sk); u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */ u32 end_seq = start_seq + shifted; /* end of newly-SACKed */ BUG_ON(!pcount); /* Adjust counters and hints for the newly sacked sequence * range but discard the return value since prev is already * marked. We must tag the range first because the seq * advancement below implicitly advances * tcp_highest_sack_seq() when skb is highest_sack. */ tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, start_seq, end_seq, dup_sack, pcount, skb->len, tcp_skb_timestamp_us(skb)); tcp_rate_skb_delivered(sk, skb, state->rate); TCP_SKB_CB(prev)->end_seq += shifted; TCP_SKB_CB(skb)->seq += shifted; tcp_skb_pcount_add(prev, pcount); WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); tcp_skb_pcount_add(skb, -pcount); /* When we're adding to gso_segs == 1, gso_size will be zero, * in theory this shouldn't be necessary but as long as DSACK * code can come after this skb later on it's better to keep * setting gso_size to something. */ if (!TCP_SKB_CB(prev)->tcp_gso_size) TCP_SKB_CB(prev)->tcp_gso_size = mss; /* CHECKME: To clear or not to clear? Mimics normal skb currently */ if (tcp_skb_pcount(skb) <= 1) TCP_SKB_CB(skb)->tcp_gso_size = 0; /* Difference in this won't matter, both ACKed by the same cumul. ACK */ TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); if (skb->len > 0) { BUG_ON(!tcp_skb_pcount(skb)); NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED); return false; } /* Whole SKB was eaten :-) */ if (skb == tp->retransmit_skb_hint) tp->retransmit_skb_hint = prev; TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) TCP_SKB_CB(prev)->end_seq++; if (skb == tcp_highest_sack(sk)) tcp_advance_highest_sack(sk, skb); tcp_skb_collapse_tstamp(prev, skb); if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp)) TCP_SKB_CB(prev)->tx.delivered_mstamp = 0; tcp_rtx_queue_unlink_and_free(skb, sk); NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED); return true; } /* I wish gso_size would have a bit more sane initialization than * something-or-zero which complicates things */ static int tcp_skb_seglen(const struct sk_buff *skb) { return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb); } /* Shifting pages past head area doesn't work */ static int skb_can_shift(const struct sk_buff *skb) { return !skb_headlen(skb) && skb_is_nonlinear(skb); } int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, int shiftlen) { /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE) * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need * to make sure not storing more than 65535 * 8 bytes per skb, * even if current MSS is bigger. */ if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE)) return 0; if (unlikely(tcp_skb_pcount(to) + pcount > 65535)) return 0; return skb_shift(to, from, shiftlen); } /* Try collapsing SACK blocks spanning across multiple skbs to a single * skb. */ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, struct tcp_sacktag_state *state, u32 start_seq, u32 end_seq, bool dup_sack) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *prev; int mss; int pcount = 0; int len; int in_sack; /* Normally R but no L won't result in plain S */ if (!dup_sack && (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS) goto fallback; if (!skb_can_shift(skb)) goto fallback; /* This frame is about to be dropped (was ACKed). */ if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) goto fallback; /* Can only happen with delayed DSACK + discard craziness */ prev = skb_rb_prev(skb); if (!prev) goto fallback; if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) goto fallback; if (!tcp_skb_can_collapse(prev, skb)) goto fallback; in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && !before(end_seq, TCP_SKB_CB(skb)->end_seq); if (in_sack) { len = skb->len; pcount = tcp_skb_pcount(skb); mss = tcp_skb_seglen(skb); /* TODO: Fix DSACKs to not fragment already SACKed and we can * drop this restriction as unnecessary */ if (mss != tcp_skb_seglen(prev)) goto fallback; } else { if (!after(TCP_SKB_CB(skb)->end_seq, start_seq)) goto noop; /* CHECKME: This is non-MSS split case only?, this will * cause skipped skbs due to advancing loop btw, original * has that feature too */ if (tcp_skb_pcount(skb) <= 1) goto noop; in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq); if (!in_sack) { /* TODO: head merge to next could be attempted here * if (!after(TCP_SKB_CB(skb)->end_seq, end_seq)), * though it might not be worth of the additional hassle * * ...we can probably just fallback to what was done * previously. We could try merging non-SACKed ones * as well but it probably isn't going to buy off * because later SACKs might again split them, and * it would make skb timestamp tracking considerably * harder problem. */ goto fallback; } len = end_seq - TCP_SKB_CB(skb)->seq; BUG_ON(len < 0); BUG_ON(len > skb->len); /* MSS boundaries should be honoured or else pcount will * severely break even though it makes things bit trickier. * Optimize common case to avoid most of the divides */ mss = tcp_skb_mss(skb); /* TODO: Fix DSACKs to not fragment already SACKed and we can * drop this restriction as unnecessary */ if (mss != tcp_skb_seglen(prev)) goto fallback; if (len == mss) { pcount = 1; } else if (len < mss) { goto noop; } else { pcount = len / mss; len = pcount * mss; } } /* tcp_sacktag_one() won't SACK-tag ranges below snd_una */ if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una)) goto fallback; if (!tcp_skb_shift(prev, skb, pcount, len)) goto fallback; if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack)) goto out; /* Hole filled allows collapsing with the next as well, this is very * useful when hole on every nth skb pattern happens */ skb = skb_rb_next(prev); if (!skb) goto out; if (!skb_can_shift(skb) || ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) || (mss != tcp_skb_seglen(skb))) goto out; if (!tcp_skb_can_collapse(prev, skb)) goto out; len = skb->len; pcount = tcp_skb_pcount(skb); if (tcp_skb_shift(prev, skb, pcount, len)) tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, 0); out: return prev; noop: return skb; fallback: NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK); return NULL; } static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, struct tcp_sack_block *next_dup, struct tcp_sacktag_state *state, u32 start_seq, u32 end_seq, bool dup_sack_in) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *tmp; skb_rbtree_walk_from(skb) { int in_sack = 0; bool dup_sack = dup_sack_in; /* queue is in-order => we can short-circuit the walk early */ if (!before(TCP_SKB_CB(skb)->seq, end_seq)) break; if (next_dup && before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) { in_sack = tcp_match_skb_to_sack(sk, skb, next_dup->start_seq, next_dup->end_seq); if (in_sack > 0) dup_sack = true; } /* skb reference here is a bit tricky to get right, since * shifting can eat and free both this skb and the next, * so not even _safe variant of the loop is enough. */ if (in_sack <= 0) { tmp = tcp_shift_skb_data(sk, skb, state, start_seq, end_seq, dup_sack); if (tmp) { if (tmp != skb) { skb = tmp; continue; } in_sack = 0; } else { in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq); } } if (unlikely(in_sack < 0)) break; if (in_sack) { TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, dup_sack, tcp_skb_pcount(skb), skb->len, tcp_skb_timestamp_us(skb)); tcp_rate_skb_delivered(sk, skb, state->rate); if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) list_del_init(&skb->tcp_tsorted_anchor); if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) tcp_advance_highest_sack(sk, skb); } } return skb; } static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, u32 seq) { struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node; struct sk_buff *skb; while (*p) { parent = *p; skb = rb_to_skb(parent); if (before(seq, TCP_SKB_CB(skb)->seq)) { p = &parent->rb_left; continue; } if (!before(seq, TCP_SKB_CB(skb)->end_seq)) { p = &parent->rb_right; continue; } return skb; } return NULL; } static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, u32 skip_to_seq) { if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq)) return skb; return tcp_sacktag_bsearch(sk, skip_to_seq); } static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, struct sock *sk, struct tcp_sack_block *next_dup, struct tcp_sacktag_state *state, u32 skip_to_seq) { if (!next_dup) return skb; if (before(next_dup->start_seq, skip_to_seq)) { skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq); skb = tcp_sacktag_walk(skb, sk, NULL, state, next_dup->start_seq, next_dup->end_seq, 1); } return skb; } static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache) { return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache); } static int tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, u32 prior_snd_una, struct tcp_sacktag_state *state) { struct tcp_sock *tp = tcp_sk(sk); const unsigned char *ptr = (skb_transport_header(ack_skb) + TCP_SKB_CB(ack_skb)->sacked); struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); struct tcp_sack_block sp[TCP_NUM_SACKS]; struct tcp_sack_block *cache; struct sk_buff *skb; int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3); int used_sacks; bool found_dup_sack = false; int i, j; int first_sack_index; state->flag = 0; state->reord = tp->snd_nxt; if (!tp->sacked_out) tcp_highest_sack_reset(sk); found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, num_sacks, prior_snd_una, state); /* Eliminate too old ACKs, but take into * account more or less fresh ones, they can * contain valid SACK info. */ if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window)) return 0; if (!tp->packets_out) goto out; used_sacks = 0; first_sack_index = 0; for (i = 0; i < num_sacks; i++) { bool dup_sack = !i && found_dup_sack; sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq); sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq); if (!tcp_is_sackblock_valid(tp, dup_sack, sp[used_sacks].start_seq, sp[used_sacks].end_seq)) { int mib_idx; if (dup_sack) { if (!tp->undo_marker) mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO; else mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD; } else { /* Don't count olds caused by ACK reordering */ if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) && !after(sp[used_sacks].end_seq, tp->snd_una)) continue; mib_idx = LINUX_MIB_TCPSACKDISCARD; } NET_INC_STATS(sock_net(sk), mib_idx); if (i == 0) first_sack_index = -1; continue; } /* Ignore very old stuff early */ if (!after(sp[used_sacks].end_seq, prior_snd_una)) { if (i == 0) first_sack_index = -1; continue; } used_sacks++; } /* order SACK blocks to allow in order walk of the retrans queue */ for (i = used_sacks - 1; i > 0; i--) { for (j = 0; j < i; j++) { if (after(sp[j].start_seq, sp[j + 1].start_seq)) { swap(sp[j], sp[j + 1]); /* Track where the first SACK block goes to */ if (j == first_sack_index) first_sack_index = j + 1; } } } state->mss_now = tcp_current_mss(sk); skb = NULL; i = 0; if (!tp->sacked_out) { /* It's already past, so skip checking against it */ cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache); } else { cache = tp->recv_sack_cache; /* Skip empty blocks in at head of the cache */ while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq && !cache->end_seq) cache++; } while (i < used_sacks) { u32 start_seq = sp[i].start_seq; u32 end_seq = sp[i].end_seq; bool dup_sack = (found_dup_sack && (i == first_sack_index)); struct tcp_sack_block *next_dup = NULL; if (found_dup_sack && ((i + 1) == first_sack_index)) next_dup = &sp[i + 1]; /* Skip too early cached blocks */ while (tcp_sack_cache_ok(tp, cache) && !before(start_seq, cache->end_seq)) cache++; /* Can skip some work by looking recv_sack_cache? */ if (tcp_sack_cache_ok(tp, cache) && !dup_sack && after(end_seq, cache->start_seq)) { /* Head todo? */ if (before(start_seq, cache->start_seq)) { skb = tcp_sacktag_skip(skb, sk, start_seq); skb = tcp_sacktag_walk(skb, sk, next_dup, state, start_seq, cache->start_seq, dup_sack); } /* Rest of the block already fully processed? */ if (!after(end_seq, cache->end_seq)) goto advance_sp; skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, state, cache->end_seq); /* ...tail remains todo... */ if (tcp_highest_sack_seq(tp) == cache->end_seq) { /* ...but better entrypoint exists! */ skb = tcp_highest_sack(sk); if (!skb) break; cache++; goto walk; } skb = tcp_sacktag_skip(skb, sk, cache->end_seq); /* Check overlap against next cached too (past this one already) */ cache++; continue; } if (!before(start_seq, tcp_highest_sack_seq(tp))) { skb = tcp_highest_sack(sk); if (!skb) break; } skb = tcp_sacktag_skip(skb, sk, start_seq); walk: skb = tcp_sacktag_walk(skb, sk, next_dup, state, start_seq, end_seq, dup_sack); advance_sp: i++; } /* Clear the head of the cache sack blocks so we can skip it next time */ for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) { tp->recv_sack_cache[i].start_seq = 0; tp->recv_sack_cache[i].end_seq = 0; } for (j = 0; j < used_sacks; j++) tp->recv_sack_cache[i++] = sp[j]; if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker) tcp_check_sack_reordering(sk, state->reord, 0); tcp_verify_left_out(tp); out: #if FASTRETRANS_DEBUG > 0 WARN_ON((int)tp->sacked_out < 0); WARN_ON((int)tp->lost_out < 0); WARN_ON((int)tp->retrans_out < 0); WARN_ON((int)tcp_packets_in_flight(tp) < 0); #endif return state->flag; } /* Limits sacked_out so that sum with lost_out isn't ever larger than * packets_out. Returns false if sacked_out adjustement wasn't necessary. */ static bool tcp_limit_reno_sacked(struct tcp_sock *tp) { u32 holes; holes = max(tp->lost_out, 1U); holes = min(holes, tp->packets_out); if ((tp->sacked_out + holes) > tp->packets_out) { tp->sacked_out = tp->packets_out - holes; return true; } return false; } /* If we receive more dupacks than we expected counting segments * in assumption of absent reordering, interpret this as reordering. * The only another reason could be bug in receiver TCP. */ static void tcp_check_reno_reordering(struct sock *sk, const int addend) { struct tcp_sock *tp = tcp_sk(sk); if (!tcp_limit_reno_sacked(tp)) return; tp->reordering = min_t(u32, tp->packets_out + addend, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); tp->reord_seen++; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER); } /* Emulate SACKs for SACKless connection: account for a new dupack. */ static void tcp_add_reno_sack(struct sock *sk, int num_dupack, bool ece_ack) { if (num_dupack) { struct tcp_sock *tp = tcp_sk(sk); u32 prior_sacked = tp->sacked_out; s32 delivered; tp->sacked_out += num_dupack; tcp_check_reno_reordering(sk, 0); delivered = tp->sacked_out - prior_sacked; if (delivered > 0) tcp_count_delivered(tp, delivered, ece_ack); tcp_verify_left_out(tp); } } /* Account for ACK, ACKing some data in Reno Recovery phase. */ static void tcp_remove_reno_sacks(struct sock *sk, int acked, bool ece_ack) { struct tcp_sock *tp = tcp_sk(sk); if (acked > 0) { /* One ACK acked hole. The rest eat duplicate ACKs. */ tcp_count_delivered(tp, max_t(int, acked - tp->sacked_out, 1), ece_ack); if (acked - 1 >= tp->sacked_out) tp->sacked_out = 0; else tp->sacked_out -= acked - 1; } tcp_check_reno_reordering(sk, acked); tcp_verify_left_out(tp); } static inline void tcp_reset_reno_sack(struct tcp_sock *tp) { tp->sacked_out = 0; } void tcp_clear_retrans(struct tcp_sock *tp) { tp->retrans_out = 0; tp->lost_out = 0; tp->undo_marker = 0; tp->undo_retrans = -1; tp->sacked_out = 0; tp->rto_stamp = 0; tp->total_rto = 0; tp->total_rto_recoveries = 0; tp->total_rto_time = 0; } static inline void tcp_init_undo(struct tcp_sock *tp) { tp->undo_marker = tp->snd_una; /* Retransmission still in flight may cause DSACKs later. */ /* First, account for regular retransmits in flight: */ tp->undo_retrans = tp->retrans_out; /* Next, account for TLP retransmits in flight: */ if (tp->tlp_high_seq && tp->tlp_retrans) tp->undo_retrans++; /* Finally, avoid 0, because undo_retrans==0 means "can undo now": */ if (!tp->undo_retrans) tp->undo_retrans = -1; } /* If we detect SACK reneging, forget all SACK information * and reset tags completely, otherwise preserve SACKs. If receiver * dropped its ofo queue, we will know this due to reneging detection. */ static void tcp_timeout_mark_lost(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb, *head; bool is_reneg; /* is receiver reneging on SACKs? */ head = tcp_rtx_queue_head(sk); is_reneg = head && (TCP_SKB_CB(head)->sacked & TCPCB_SACKED_ACKED); if (is_reneg) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); tp->sacked_out = 0; /* Mark SACK reneging until we recover from this loss event. */ tp->is_sack_reneg = 1; } else if (tcp_is_reno(tp)) { tcp_reset_reno_sack(tp); } skb = head; skb_rbtree_walk_from(skb) { if (is_reneg) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; else if (skb != head && tcp_rack_skb_timeout(tp, skb, 0) > 0) continue; /* Don't mark recently sent ones lost yet */ tcp_mark_skb_lost(sk, skb); } tcp_verify_left_out(tp); tcp_clear_all_retrans_hints(tp); } /* Enter Loss state. */ void tcp_enter_loss(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; u8 reordering; tcp_timeout_mark_lost(sk); /* Reduce ssthresh if it has not yet been made inside this window. */ if (icsk->icsk_ca_state <= TCP_CA_Disorder || !after(tp->high_seq, tp->snd_una) || (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(sk); tp->prior_cwnd = tcp_snd_cwnd(tp); tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tcp_ca_event(sk, CA_EVENT_LOSS); tcp_init_undo(tp); } tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + 1); tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_jiffies32; /* Timeout in disordered state after receiving substantial DUPACKs * suggests that the degree of reordering is over-estimated. */ reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering); if (icsk->icsk_ca_state <= TCP_CA_Disorder && tp->sacked_out >= reordering) tp->reordering = min_t(unsigned int, tp->reordering, reordering); tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; tp->tlp_high_seq = 0; tcp_ecn_queue_cwr(tp); /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous * loss recovery is underway except recurring timeout(s) on * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing */ tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) && (new_recovery || icsk->icsk_retransmits) && !inet_csk(sk)->icsk_mtup.probe_size; } /* If ACK arrived pointing to a remembered SACK, it means that our * remembered SACKs do not reflect real state of receiver i.e. * receiver _host_ is heavily congested (or buggy). * * To avoid big spurious retransmission bursts due to transient SACK * scoreboard oddities that look like reneging, we give the receiver a * little time (max(RTT/2, 10ms)) to send us some more ACKs that will * restore sanity to the SACK scoreboard. If the apparent reneging * persists until this RTO then we'll clear the SACK scoreboard. */ static bool tcp_check_sack_reneging(struct sock *sk, int *ack_flag) { if (*ack_flag & FLAG_SACK_RENEGING && *ack_flag & FLAG_SND_UNA_ADVANCED) { struct tcp_sock *tp = tcp_sk(sk); unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4), msecs_to_jiffies(10)); tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, false); *ack_flag &= ~FLAG_SET_XMIT_TIMER; return true; } return false; } /* Linux NewReno/SACK/ECN state machine. * -------------------------------------- * * "Open" Normal state, no dubious events, fast path. * "Disorder" In all the respects it is "Open", * but requires a bit more attention. It is entered when * we see some SACKs or dupacks. It is split of "Open" * mainly to move some processing from fast path to slow one. * "CWR" CWND was reduced due to some Congestion Notification event. * It can be ECN, ICMP source quench, local device congestion. * "Recovery" CWND was reduced, we are fast-retransmitting. * "Loss" CWND was reduced due to RTO timeout or SACK reneging. * * tcp_fastretrans_alert() is entered: * - each incoming ACK, if state is not "Open" * - when arrived ACK is unusual, namely: * * SACK * * Duplicate ACK. * * ECN ECE. * * Counting packets in flight is pretty simple. * * in_flight = packets_out - left_out + retrans_out * * packets_out is SND.NXT-SND.UNA counted in packets. * * retrans_out is number of retransmitted segments. * * left_out is number of segments left network, but not ACKed yet. * * left_out = sacked_out + lost_out * * sacked_out: Packets, which arrived to receiver out of order * and hence not ACKed. With SACKs this number is simply * amount of SACKed data. Even without SACKs * it is easy to give pretty reliable estimate of this number, * counting duplicate ACKs. * * lost_out: Packets lost by network. TCP has no explicit * "loss notification" feedback from network (for now). * It means that this number can be only _guessed_. * Actually, it is the heuristics to predict lossage that * distinguishes different algorithms. * * F.e. after RTO, when all the queue is considered as lost, * lost_out = packets_out and in_flight = retrans_out. * * Essentially, we have now a few algorithms detecting * lost packets. * * If the receiver supports SACK: * * RACK (RFC8985): RACK is a newer loss detection algorithm * (2017-) that checks timing instead of counting DUPACKs. * Essentially a packet is considered lost if it's not S/ACKed * after RTT + reordering_window, where both metrics are * dynamically measured and adjusted. This is implemented in * tcp_rack_mark_lost. * * If the receiver does not support SACK: * * NewReno (RFC6582): in Recovery we assume that one segment * is lost (classic Reno). While we are in Recovery and * a partial ACK arrives, we assume that one more packet * is lost (NewReno). This heuristics are the same in NewReno * and SACK. * * The really tricky (and requiring careful tuning) part of the algorithm * is hidden in the RACK code in tcp_recovery.c and tcp_xmit_retransmit_queue(). * The first determines the moment _when_ we should reduce CWND and, * hence, slow down forward transmission. In fact, it determines the moment * when we decide that hole is caused by loss, rather than by a reorder. * * tcp_xmit_retransmit_queue() decides, _what_ we should retransmit to fill * holes, caused by lost packets. * * And the most logically complicated part of algorithm is undo * heuristics. We detect false retransmits due to both too early * fast retransmit (reordering) and underestimated RTO, analyzing * timestamps and D-SACKs. When we detect that some segments were * retransmitted by mistake and CWND reduction was wrong, we undo * window reduction and abort recovery phase. This logic is hidden * inside several functions named tcp_try_undo_<something>. */ /* This function decides, when we should leave Disordered state * and enter Recovery phase, reducing congestion window. * * Main question: may we further continue forward transmission * with the same cwnd? */ static bool tcp_time_to_recover(const struct tcp_sock *tp) { /* Has loss detection marked at least one packet lost? */ return tp->lost_out != 0; } static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when) { return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && before(tp->rx_opt.rcv_tsecr, when); } /* skb is spurious retransmitted if the returned timestamp echo * reply is prior to the skb transmission time */ static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp, const struct sk_buff *skb) { return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) && tcp_tsopt_ecr_before(tp, tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb)); } /* Nothing was retransmitted or returned timestamp is less * than timestamp of the first retransmission. */ static inline bool tcp_packet_delayed(const struct tcp_sock *tp) { const struct sock *sk = (const struct sock *)tp; /* Received an echoed timestamp before the first retransmission? */ if (tp->retrans_stamp) return tcp_tsopt_ecr_before(tp, tp->retrans_stamp); /* We set tp->retrans_stamp upon the first retransmission of a loss * recovery episode, so normally if tp->retrans_stamp is 0 then no * retransmission has happened yet (likely due to TSQ, which can cause * fast retransmits to be delayed). So if snd_una advanced while * (tp->retrans_stamp is 0 then apparently a packet was merely delayed, * not lost. But there are exceptions where we retransmit but then * clear tp->retrans_stamp, so we check for those exceptions. */ /* (1) For non-SACK connections, tcp_is_non_sack_preventing_reopen() * clears tp->retrans_stamp when snd_una == high_seq. */ if (!tcp_is_sack(tp) && !before(tp->snd_una, tp->high_seq)) return false; /* (2) In TCP_SYN_SENT tcp_clean_rtx_queue() clears tp->retrans_stamp * when setting FLAG_SYN_ACKED is set, even if the SYN was * retransmitted. */ if (sk->sk_state == TCP_SYN_SENT) return false; return true; /* tp->retrans_stamp is zero; no retransmit yet */ } /* Undo procedures. */ /* We can clear retrans_stamp when there are no retransmissions in the * window. It would seem that it is trivially available for us in * tp->retrans_out, however, that kind of assumptions doesn't consider * what will happen if errors occur when sending retransmission for the * second time. ...It could the that such segment has only * TCPCB_EVER_RETRANS set at the present time. It seems that checking * the head skb is enough except for some reneging corner cases that * are not worth the effort. * * Main reason for all this complexity is the fact that connection dying * time now depends on the validity of the retrans_stamp, in particular, * that successive retransmissions of a segment must not advance * retrans_stamp under any conditions. */ static bool tcp_any_retrans_done(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; if (tp->retrans_out) return true; skb = tcp_rtx_queue_head(sk); if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS)) return true; return false; } /* If loss recovery is finished and there are no retransmits out in the * network, then we clear retrans_stamp so that upon the next loss recovery * retransmits_timed_out() and timestamp-undo are using the correct value. */ static void tcp_retrans_stamp_cleanup(struct sock *sk) { if (!tcp_any_retrans_done(sk)) tcp_sk(sk)->retrans_stamp = 0; } static void DBGUNDO(struct sock *sk, const char *msg) { #if FASTRETRANS_DEBUG > 1 struct tcp_sock *tp = tcp_sk(sk); struct inet_sock *inet = inet_sk(sk); if (sk->sk_family == AF_INET) { pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", msg, &inet->inet_daddr, ntohs(inet->inet_dport), tcp_snd_cwnd(tp), tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", msg, &sk->sk_v6_daddr, ntohs(inet->inet_dport), tcp_snd_cwnd(tp), tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); } #endif #endif } static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) { struct tcp_sock *tp = tcp_sk(sk); if (unmark_loss) { struct sk_buff *skb; skb_rbtree_walk(skb, &sk->tcp_rtx_queue) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; } tp->lost_out = 0; tcp_clear_all_retrans_hints(tp); } if (tp->prior_ssthresh) { const struct inet_connection_sock *icsk = inet_csk(sk); tcp_snd_cwnd_set(tp, icsk->icsk_ca_ops->undo_cwnd(sk)); if (tp->prior_ssthresh > tp->snd_ssthresh) { tp->snd_ssthresh = tp->prior_ssthresh; tcp_ecn_withdraw_cwr(tp); } } tp->snd_cwnd_stamp = tcp_jiffies32; tp->undo_marker = 0; tp->rack.advanced = 1; /* Force RACK to re-exam losses */ } static inline bool tcp_may_undo(const struct tcp_sock *tp) { return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp)); } static bool tcp_is_non_sack_preventing_reopen(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { /* Hold old state until something *above* high_seq * is ACKed. For Reno it is MUST to prevent false * fast retransmits (RFC2582). SACK TCP is safe. */ if (!tcp_any_retrans_done(sk)) tp->retrans_stamp = 0; return true; } return false; } /* People celebrate: "We love our President!" */ static bool tcp_try_undo_recovery(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); if (tcp_may_undo(tp)) { int mib_idx; /* Happy end! We did not retransmit anything * or our original transmission succeeded. */ DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); tcp_undo_cwnd_reduction(sk, false); if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) mib_idx = LINUX_MIB_TCPLOSSUNDO; else mib_idx = LINUX_MIB_TCPFULLUNDO; NET_INC_STATS(sock_net(sk), mib_idx); } else if (tp->rack.reo_wnd_persist) { tp->rack.reo_wnd_persist--; } if (tcp_is_non_sack_preventing_reopen(sk)) return true; tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; return false; } /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */ static bool tcp_try_undo_dsack(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); if (tp->undo_marker && !tp->undo_retrans) { tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH, tp->rack.reo_wnd_persist + 1); DBGUNDO(sk, "D-SACK"); tcp_undo_cwnd_reduction(sk, false); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); return true; } return false; } /* Undo during loss recovery after partial ACK or using F-RTO. */ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) { struct tcp_sock *tp = tcp_sk(sk); if (frto_undo || tcp_may_undo(tp)) { tcp_undo_cwnd_reduction(sk, true); DBGUNDO(sk, "partial loss"); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); if (frto_undo) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); WRITE_ONCE(inet_csk(sk)->icsk_retransmits, 0); if (tcp_is_non_sack_preventing_reopen(sk)) return true; if (frto_undo || tcp_is_sack(tp)) { tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; } return true; } return false; } /* The cwnd reduction in CWR and Recovery uses the PRR algorithm in RFC 6937. * It computes the number of packets to send (sndcnt) based on packets newly * delivered: * 1) If the packets in flight is larger than ssthresh, PRR spreads the * cwnd reductions across a full RTT. * 2) Otherwise PRR uses packet conservation to send as much as delivered. * But when SND_UNA is acked without further losses, * slow starts cwnd up to ssthresh to speed up the recovery. */ static void tcp_init_cwnd_reduction(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); tp->high_seq = tp->snd_nxt; tp->tlp_high_seq = 0; tp->snd_cwnd_cnt = 0; tp->prior_cwnd = tcp_snd_cwnd(tp); tp->prr_delivered = 0; tp->prr_out = 0; tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); tcp_ecn_queue_cwr(tp); } void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag) { struct tcp_sock *tp = tcp_sk(sk); int sndcnt = 0; int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd)) return; trace_tcp_cwnd_reduction_tp(sk, newly_acked_sacked, newly_lost, flag); tp->prr_delivered += newly_acked_sacked; if (delta < 0) { u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + tp->prior_cwnd - 1; sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; } else { sndcnt = max_t(int, tp->prr_delivered - tp->prr_out, newly_acked_sacked); if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost) sndcnt++; sndcnt = min(delta, sndcnt); } /* Force a fast retransmit upon entering fast recovery */ sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1)); tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + sndcnt); } static inline void tcp_end_cwnd_reduction(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); if (inet_csk(sk)->icsk_ca_ops->cong_control) return; /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH && (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) { tcp_snd_cwnd_set(tp, tp->snd_ssthresh);