Total coverage: 225377 (13%)of 1867041
1254 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Access to user system call parameters and results * * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved. * * See asm-generic/syscall.h for descriptions of what we must do here. */ #ifndef _ASM_X86_SYSCALL_H #define _ASM_X86_SYSCALL_H #include <uapi/linux/audit.h> #include <linux/sched.h> #include <linux/err.h> #include <asm/thread_info.h> /* for TS_COMPAT */ #include <asm/unistd.h> /* This is used purely for kernel/trace/trace_syscalls.c */ typedef long (*sys_call_ptr_t)(const struct pt_regs *); extern const sys_call_ptr_t sys_call_table[]; /* * These may not exist, but still put the prototypes in so we * can use IS_ENABLED(). */ extern long ia32_sys_call(const struct pt_regs *, unsigned int nr); extern long x32_sys_call(const struct pt_regs *, unsigned int nr); extern long x64_sys_call(const struct pt_regs *, unsigned int nr); /* * Only the low 32 bits of orig_ax are meaningful, so we return int. * This importantly ignores the high bits on 64-bit, so comparisons * sign-extend the low 32 bits. */ static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { return regs->orig_ax; } static inline void syscall_set_nr(struct task_struct *task, struct pt_regs *regs, int nr) { regs->orig_ax = nr; } static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { regs->ax = regs->orig_ax; } static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { unsigned long error = regs->ax; #ifdef CONFIG_IA32_EMULATION /* * TS_COMPAT is set for 32-bit syscall entries and then * remains set until we return to user mode. */ if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED)) /* * Sign-extend the value so (int)-EFOO becomes (long)-EFOO * and will match correctly in comparisons. */ error = (long) (int) error; #endif return IS_ERR_VALUE(error) ? error : 0; } static inline long syscall_get_return_value(struct task_struct *task, struct pt_regs *regs) { return regs->ax; } static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { regs->ax = (long) error ?: val; } #ifdef CONFIG_X86_32 static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, unsigned long *args) { args[0] = regs->bx; args[1] = regs->cx; args[2] = regs->dx; args[3] = regs->si; args[4] = regs->di; args[5] = regs->bp; } static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, const unsigned long *args) { regs->bx = args[0]; regs->cx = args[1]; regs->dx = args[2]; regs->si = args[3]; regs->di = args[4]; regs->bp = args[5]; } static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_I386; } #else /* CONFIG_X86_64 */ static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, unsigned long *args) { # ifdef CONFIG_IA32_EMULATION if (task->thread_info.status & TS_COMPAT) { *args++ = regs->bx; *args++ = regs->cx; *args++ = regs->dx; *args++ = regs->si; *args++ = regs->di; *args = regs->bp; } else # endif { *args++ = regs->di; *args++ = regs->si; *args++ = regs->dx; *args++ = regs->r10; *args++ = regs->r8; *args = regs->r9; } } static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, const unsigned long *args) { # ifdef CONFIG_IA32_EMULATION if (task->thread_info.status & TS_COMPAT) { regs->bx = *args++; regs->cx = *args++; regs->dx = *args++; regs->si = *args++; regs->di = *args++; regs->bp = *args; } else # endif { regs->di = *args++; regs->si = *args++; regs->dx = *args++; regs->r10 = *args++; regs->r8 = *args++; regs->r9 = *args; } } static inline int syscall_get_arch(struct task_struct *task) { /* x32 tasks should be considered AUDIT_ARCH_X86_64. */ return (IS_ENABLED(CONFIG_IA32_EMULATION) && task->thread_info.status & TS_COMPAT) ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; } bool do_syscall_64(struct pt_regs *regs, int nr); void do_int80_emulation(struct pt_regs *regs); #endif /* CONFIG_X86_32 */ void do_int80_syscall_32(struct pt_regs *regs); bool do_fast_syscall_32(struct pt_regs *regs); bool do_SYSENTER_32(struct pt_regs *regs); #endif /* _ASM_X86_SYSCALL_H */
43 43 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 // SPDX-License-Identifier: GPL-2.0-or-later /* RxRPC security handling * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/module.h> #include <linux/net.h> #include <linux/skbuff.h> #include <linux/udp.h> #include <linux/crypto.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include <keys/rxrpc-type.h> #include "ar-internal.h" static const struct rxrpc_security *rxrpc_security_types[] = { [RXRPC_SECURITY_NONE] = &rxrpc_no_security, #ifdef CONFIG_RXKAD [RXRPC_SECURITY_RXKAD] = &rxkad, #endif #ifdef CONFIG_RXGK [RXRPC_SECURITY_YFS_RXGK] = &rxgk_yfs, #endif }; int __init rxrpc_init_security(void) { int i, ret; for (i = 0; i < ARRAY_SIZE(rxrpc_security_types); i++) { if (rxrpc_security_types[i]) { ret = rxrpc_security_types[i]->init(); if (ret < 0) goto failed; } } return 0; failed: for (i--; i >= 0; i--) if (rxrpc_security_types[i]) rxrpc_security_types[i]->exit(); return ret; } void rxrpc_exit_security(void) { int i; for (i = 0; i < ARRAY_SIZE(rxrpc_security_types); i++) if (rxrpc_security_types[i]) rxrpc_security_types[i]->exit(); } /* * look up an rxrpc security module */ const struct rxrpc_security *rxrpc_security_lookup(u8 security_index) { if (security_index >= ARRAY_SIZE(rxrpc_security_types)) return NULL; return rxrpc_security_types[security_index]; } /* * Initialise the security on a client call. */ int rxrpc_init_client_call_security(struct rxrpc_call *call) { const struct rxrpc_security *sec = &rxrpc_no_security; struct rxrpc_key_token *token; struct key *key = call->key; int ret; if (!key) goto found; ret = key_validate(key); if (ret < 0) return ret; for (token = key->payload.data[0]; token; token = token->next) { sec = rxrpc_security_lookup(token->security_index); if (sec) goto found; } return -EKEYREJECTED; found: call->security = sec; call->security_ix = sec->security_index; return 0; } /* * initialise the security on a client connection */ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) { struct rxrpc_key_token *token; struct key *key = conn->key; int ret = 0; _enter("{%d},{%x}", conn->debug_id, key_serial(key)); for (token = key->payload.data[0]; token; token = token->next) { if (token->security_index == conn->security->security_index) goto found; } return -EKEYREJECTED; found: mutex_lock(&conn->security_lock); if (conn->state == RXRPC_CONN_CLIENT_UNSECURED) { ret = conn->security->init_connection_security(conn, token); if (ret == 0) { spin_lock_irq(&conn->state_lock); if (conn->state == RXRPC_CONN_CLIENT_UNSECURED) conn->state = RXRPC_CONN_CLIENT; spin_unlock_irq(&conn->state_lock); } } mutex_unlock(&conn->security_lock); return ret; } /* * Set the ops a server connection. */ const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *rx, struct sk_buff *skb) { const struct rxrpc_security *sec; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); _enter(""); sec = rxrpc_security_lookup(sp->hdr.securityIndex); if (!sec) { rxrpc_direct_conn_abort(skb, rxrpc_abort_unsupported_security, RX_INVALID_OPERATION, -EKEYREJECTED); return NULL; } if (sp->hdr.securityIndex != RXRPC_SECURITY_NONE && !rx->securities) { rxrpc_direct_conn_abort(skb, rxrpc_abort_no_service_key, sec->no_key_abort, -EKEYREJECTED); return NULL; } return sec; } /* * Find the security key for a server connection. */ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn, struct sk_buff *skb, u32 kvno, u32 enctype) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_sock *rx; struct key *key = ERR_PTR(-EKEYREJECTED); key_ref_t kref = NULL; char kdesc[5 + 1 + 3 + 1 + 12 + 1 + 12 + 1]; int ret; _enter(""); if (enctype) sprintf(kdesc, "%u:%u:%u:%u", sp->hdr.serviceId, sp->hdr.securityIndex, kvno, enctype); else if (kvno) sprintf(kdesc, "%u:%u:%u", sp->hdr.serviceId, sp->hdr.securityIndex, kvno); else sprintf(kdesc, "%u:%u", sp->hdr.serviceId, sp->hdr.securityIndex); read_lock(&conn->local->services_lock); rx = conn->local->service; if (!rx) goto out; /* look through the service's keyring */ kref = keyring_search(make_key_ref(rx->securities, 1UL), &key_type_rxrpc_s, kdesc, true); if (IS_ERR(kref)) { key = ERR_CAST(kref); goto out; } key = key_ref_to_ptr(kref); ret = key_validate(key); if (ret < 0) { key_put(key); key = ERR_PTR(ret); goto out; } out: read_unlock(&conn->local->services_lock); return key; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 /* SPDX-License-Identifier: GPL-2.0 * * page_pool/helpers.h * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com> * Copyright (C) 2016 Red Hat, Inc. */ /** * DOC: page_pool allocator * * The page_pool allocator is optimized for recycling page or page fragment used * by skb packet and xdp frame. * * Basic use involves replacing any alloc_pages() calls with page_pool_alloc(), * which allocate memory with or without page splitting depending on the * requested memory size. * * If the driver knows that it always requires full pages or its allocations are * always smaller than half a page, it can use one of the more specific API * calls: * * 1. page_pool_alloc_pages(): allocate memory without page splitting when * driver knows that the memory it need is always bigger than half of the page * allocated from page pool. There is no cache line dirtying for 'struct page' * when a page is recycled back to the page pool. * * 2. page_pool_alloc_frag(): allocate memory with page splitting when driver * knows that the memory it need is always smaller than or equal to half of the * page allocated from page pool. Page splitting enables memory saving and thus * avoids TLB/cache miss for data access, but there also is some cost to * implement page splitting, mainly some cache line dirtying/bouncing for * 'struct page' and atomic operation for page->pp_ref_count. * * The API keeps track of in-flight pages, in order to let API users know when * it is safe to free a page_pool object, the API users must call * page_pool_put_page() or page_pool_free_va() to free the page_pool object, or * attach the page_pool object to a page_pool-aware object like skbs marked with * skb_mark_for_recycle(). * * page_pool_put_page() may be called multiple times on the same page if a page * is split into multiple fragments. For the last fragment, it will either * recycle the page, or in case of page->_refcount > 1, it will release the DMA * mapping and in-flight state accounting. * * dma_sync_single_range_for_device() is only called for the last fragment when * page_pool is created with PP_FLAG_DMA_SYNC_DEV flag, so it depends on the * last freed fragment to do the sync_for_device operation for all fragments in * the same page when a page is split. The API user must setup pool->p.max_len * and pool->p.offset correctly and ensure that page_pool_put_page() is called * with dma_sync_size being -1 for fragment API. */ #ifndef _NET_PAGE_POOL_HELPERS_H #define _NET_PAGE_POOL_HELPERS_H #include <linux/dma-mapping.h> #include <net/page_pool/types.h> #include <net/net_debug.h> #include <net/netmem.h> #ifdef CONFIG_PAGE_POOL_STATS /* Deprecated driver-facing API, use netlink instead */ int page_pool_ethtool_stats_get_count(void); u8 *page_pool_ethtool_stats_get_strings(u8 *data); u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats); bool page_pool_get_stats(const struct page_pool *pool, struct page_pool_stats *stats); #else static inline int page_pool_ethtool_stats_get_count(void) { return 0; } static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data) { return data; } static inline u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats) { return data; } #endif /** * page_pool_dev_alloc_pages() - allocate a page. * @pool: pool from which to allocate * * Get a page from the page allocator or page_pool caches. */ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) { gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); return page_pool_alloc_pages(pool, gfp); } /** * page_pool_dev_alloc_frag() - allocate a page fragment. * @pool: pool from which to allocate * @offset: offset to the allocated page * @size: requested size * * Get a page fragment from the page allocator or page_pool caches. * * Return: allocated page fragment, otherwise return NULL. */ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, unsigned int *offset, unsigned int size) { gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); return page_pool_alloc_frag(pool, offset, size, gfp); } static inline netmem_ref page_pool_alloc_netmem(struct page_pool *pool, unsigned int *offset, unsigned int *size, gfp_t gfp) { unsigned int max_size = PAGE_SIZE << pool->p.order; netmem_ref netmem; if ((*size << 1) > max_size) { *size = max_size; *offset = 0; return page_pool_alloc_netmems(pool, gfp); } netmem = page_pool_alloc_frag_netmem(pool, offset, *size, gfp); if (unlikely(!netmem)) return 0; /* There is very likely not enough space for another fragment, so append * the remaining size to the current fragment to avoid truesize * underestimate problem. */ if (pool->frag_offset + *size > max_size) { *size = max_size - *offset; pool->frag_offset = max_size; } return netmem; } static inline netmem_ref page_pool_dev_alloc_netmem(struct page_pool *pool, unsigned int *offset, unsigned int *size) { gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; return page_pool_alloc_netmem(pool, offset, size, gfp); } static inline struct page *page_pool_alloc(struct page_pool *pool, unsigned int *offset, unsigned int *size, gfp_t gfp) { return netmem_to_page(page_pool_alloc_netmem(pool, offset, size, gfp)); } /** * page_pool_dev_alloc() - allocate a page or a page fragment. * @pool: pool from which to allocate * @offset: offset to the allocated page * @size: in as the requested size, out as the allocated size * * Get a page or a page fragment from the page allocator or page_pool caches * depending on the requested size in order to allocate memory with least memory * utilization and performance penalty. * * Return: allocated page or page fragment, otherwise return NULL. */ static inline struct page *page_pool_dev_alloc(struct page_pool *pool, unsigned int *offset, unsigned int *size) { gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); return page_pool_alloc(pool, offset, size, gfp); } static inline void *page_pool_alloc_va(struct page_pool *pool, unsigned int *size, gfp_t gfp) { unsigned int offset; struct page *page; /* Mask off __GFP_HIGHMEM to ensure we can use page_address() */ page = page_pool_alloc(pool, &offset, size, gfp & ~__GFP_HIGHMEM); if (unlikely(!page)) return NULL; return page_address(page) + offset; } /** * page_pool_dev_alloc_va() - allocate a page or a page fragment and return its * va. * @pool: pool from which to allocate * @size: in as the requested size, out as the allocated size * * This is just a thin wrapper around the page_pool_alloc() API, and * it returns va of the allocated page or page fragment. * * Return: the va for the allocated page or page fragment, otherwise return NULL. */ static inline void *page_pool_dev_alloc_va(struct page_pool *pool, unsigned int *size) { gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); return page_pool_alloc_va(pool, size, gfp); } /** * page_pool_get_dma_dir() - Retrieve the stored DMA direction. * @pool: pool from which page was allocated * * Get the stored dma direction. A driver might decide to store this locally * and avoid the extra cache line from page_pool to determine the direction. */ static inline enum dma_data_direction page_pool_get_dma_dir(const struct page_pool *pool) { return pool->p.dma_dir; } static inline void page_pool_fragment_netmem(netmem_ref netmem, long nr) { atomic_long_set(netmem_get_pp_ref_count_ref(netmem), nr); } /** * page_pool_fragment_page() - split a fresh page into fragments * @page: page to split * @nr: references to set * * pp_ref_count represents the number of outstanding references to the page, * which will be freed using page_pool APIs (rather than page allocator APIs * like put_page()). Such references are usually held by page_pool-aware * objects like skbs marked for page pool recycling. * * This helper allows the caller to take (set) multiple references to a * freshly allocated page. The page must be freshly allocated (have a * pp_ref_count of 1). This is commonly done by drivers and * "fragment allocators" to save atomic operations - either when they know * upfront how many references they will need; or to take MAX references and * return the unused ones with a single atomic dec(), instead of performing * multiple atomic inc() operations. */ static inline void page_pool_fragment_page(struct page *page, long nr) { page_pool_fragment_netmem(page_to_netmem(page), nr); } static inline long page_pool_unref_netmem(netmem_ref netmem, long nr) { atomic_long_t *pp_ref_count = netmem_get_pp_ref_count_ref(netmem); long ret; /* If nr == pp_ref_count then we have cleared all remaining * references to the page: * 1. 'n == 1': no need to actually overwrite it. * 2. 'n != 1': overwrite it with one, which is the rare case * for pp_ref_count draining. * * The main advantage to doing this is that not only we avoid a atomic * update, as an atomic_read is generally a much cheaper operation than * an atomic update, especially when dealing with a page that may be * referenced by only 2 or 3 users; but also unify the pp_ref_count * handling by ensuring all pages have partitioned into only 1 piece * initially, and only overwrite it when the page is partitioned into * more than one piece. */ if (atomic_long_read(pp_ref_count) == nr) { /* As we have ensured nr is always one for constant case using * the BUILD_BUG_ON(), only need to handle the non-constant case * here for pp_ref_count draining, which is a rare case. */ BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1); if (!__builtin_constant_p(nr)) atomic_long_set(pp_ref_count, 1); return 0; } ret = atomic_long_sub_return(nr, pp_ref_count); WARN_ON(ret < 0); /* We are the last user here too, reset pp_ref_count back to 1 to * ensure all pages have been partitioned into 1 piece initially, * this should be the rare case when the last two fragment users call * page_pool_unref_page() currently. */ if (unlikely(!ret)) atomic_long_set(pp_ref_count, 1); return ret; } static inline long page_pool_unref_page(struct page *page, long nr) { return page_pool_unref_netmem(page_to_netmem(page), nr); } static inline void page_pool_ref_netmem(netmem_ref netmem) { atomic_long_inc(netmem_get_pp_ref_count_ref(netmem)); } static inline void page_pool_ref_page(struct page *page) { page_pool_ref_netmem(page_to_netmem(page)); } static inline bool page_pool_unref_and_test(netmem_ref netmem) { /* If page_pool_unref_page() returns 0, we were the last user */ return page_pool_unref_netmem(netmem, 1) == 0; } static inline void page_pool_put_netmem(struct page_pool *pool, netmem_ref netmem, unsigned int dma_sync_size, bool allow_direct) { /* When page_pool isn't compiled-in, net/core/xdp.c doesn't * allow registering MEM_TYPE_PAGE_POOL, but shield linker. */ #ifdef CONFIG_PAGE_POOL if (!page_pool_unref_and_test(netmem)) return; page_pool_put_unrefed_netmem(pool, netmem, dma_sync_size, allow_direct); #endif } /** * page_pool_put_page() - release a reference to a page pool page * @pool: pool from which page was allocated * @page: page to release a reference on * @dma_sync_size: how much of the page may have been touched by the device * @allow_direct: released by the consumer, allow lockless caching * * The outcome of this depends on the page refcnt. If the driver bumps * the refcnt > 1 this will unmap the page. If the page refcnt is 1 * the allocator owns the page and will try to recycle it in one of the pool * caches. If PP_FLAG_DMA_SYNC_DEV is set, the page will be synced for_device * using dma_sync_single_range_for_device(). */ static inline void page_pool_put_page(struct page_pool *pool, struct page *page, unsigned int dma_sync_size, bool allow_direct) { page_pool_put_netmem(pool, page_to_netmem(page), dma_sync_size, allow_direct); } static inline void page_pool_put_full_netmem(struct page_pool *pool, netmem_ref netmem, bool allow_direct) { page_pool_put_netmem(pool, netmem, -1, allow_direct); } /** * page_pool_put_full_page() - release a reference on a page pool page * @pool: pool from which page was allocated * @page: page to release a reference on * @allow_direct: released by the consumer, allow lockless caching * * Similar to page_pool_put_page(), but will DMA sync the entire memory area * as configured in &page_pool_params.max_len. */ static inline void page_pool_put_full_page(struct page_pool *pool, struct page *page, bool allow_direct) { page_pool_put_netmem(pool, page_to_netmem(page), -1, allow_direct); } /** * page_pool_recycle_direct() - release a reference on a page pool page * @pool: pool from which page was allocated * @page: page to release a reference on * * Similar to page_pool_put_full_page() but caller must guarantee safe context * (e.g NAPI), since it will recycle the page directly into the pool fast cache. */ static inline void page_pool_recycle_direct(struct page_pool *pool, struct page *page) { page_pool_put_full_page(pool, page, true); } static inline void page_pool_recycle_direct_netmem(struct page_pool *pool, netmem_ref netmem) { page_pool_put_full_netmem(pool, netmem, true); } #define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA \ (sizeof(dma_addr_t) > sizeof(unsigned long)) /** * page_pool_free_va() - free a va into the page_pool * @pool: pool from which va was allocated * @va: va to be freed * @allow_direct: freed by the consumer, allow lockless caching * * Free a va allocated from page_pool_allo_va(). */ static inline void page_pool_free_va(struct page_pool *pool, void *va, bool allow_direct) { page_pool_put_page(pool, virt_to_head_page(va), -1, allow_direct); } static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem) { dma_addr_t ret = netmem_get_dma_addr(netmem); if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) ret <<= PAGE_SHIFT; return ret; } /** * page_pool_get_dma_addr() - Retrieve the stored DMA address. * @page: page allocated from a page pool * * Fetch the DMA address of the page. The page pool to which the page belongs * must had been created with PP_FLAG_DMA_MAP. */ static inline dma_addr_t page_pool_get_dma_addr(const struct page *page) { dma_addr_t ret = page->dma_addr; if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) ret <<= PAGE_SHIFT; return ret; } static inline void __page_pool_dma_sync_for_cpu(const struct page_pool *pool, const dma_addr_t dma_addr, u32 offset, u32 dma_sync_size) { dma_sync_single_range_for_cpu(pool->p.dev, dma_addr, offset + pool->p.offset, dma_sync_size, page_pool_get_dma_dir(pool)); } /** * page_pool_dma_sync_for_cpu - sync Rx page for CPU after it's written by HW * @pool: &page_pool the @page belongs to * @page: page to sync * @offset: offset from page start to "hard" start if using PP frags * @dma_sync_size: size of the data written to the page * * Can be used as a shorthand to sync Rx pages before accessing them in the * driver. Caller must ensure the pool was created with ``PP_FLAG_DMA_MAP``. * Note that this version performs DMA sync unconditionally, even if the * associated PP doesn't perform sync-for-device. */ static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool, const struct page *page, u32 offset, u32 dma_sync_size) { __page_pool_dma_sync_for_cpu(pool, page_pool_get_dma_addr(page), offset, dma_sync_size); } static inline void page_pool_dma_sync_netmem_for_cpu(const struct page_pool *pool, const netmem_ref netmem, u32 offset, u32 dma_sync_size) { if (!pool->dma_sync_for_cpu) return; __page_pool_dma_sync_for_cpu(pool, page_pool_get_dma_addr_netmem(netmem), offset, dma_sync_size); } static inline bool page_pool_put(struct page_pool *pool) { return refcount_dec_and_test(&pool->user_cnt); } static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) { if (unlikely(pool->p.nid != new_nid)) page_pool_update_nid(pool, new_nid); } static inline bool page_pool_is_unreadable(struct page_pool *pool) { return !!pool->mp_ops; } #endif /* _NET_PAGE_POOL_HELPERS_H */
77 77 77 77 77 77 76 684 684 298 13 401 98 12 91 98 438 103 11 7 33 3 3 3 3 3 3 3 3 25 53 3 19 126 5 3 117 19 53 9 65 67 19 98 5 15 4 5 4 2 3 4 3 1 1 351 1310 684 1696 1306 685 684 682 834 196 818 342 9 2443 2444 2438 554 2238 12 2440 2443 2442 2436 2440 2 2444 598 58 898 653 399 887 796 101 42 58 1076 806 487 984 21 173 991 43 181 4 34 153 157 155 41 156 1 9 11 19 12 1 1 3 5 8 2 23 3 22 25 4 3 53 7 2 4 2 7 3 2146 165 1158 146 22 71 172 1 10 102 58 90 19 11 9 351 478 33 7 11 106 1702 1653 136 193 2442 861 77 209 887 22 97 173 149 182 62 786 338 797 171 1175 1180 1174 247 714 447 28 28 150 150 151 87 41 9 13 7 4 5 8 4 4 4 61 20 41 546 543 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 // SPDX-License-Identifier: GPL-2.0-only #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/export.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/if_vlan.h> #include <linux/filter.h> #include <net/dsa.h> #include <net/dst_metadata.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/gre.h> #include <net/pptp.h> #include <net/tipc.h> #include <linux/igmp.h> #include <linux/icmp.h> #include <linux/sctp.h> #include <linux/dccp.h> #include <linux/if_tunnel.h> #include <linux/if_pppox.h> #include <linux/ppp_defs.h> #include <linux/stddef.h> #include <linux/if_ether.h> #include <linux/if_hsr.h> #include <linux/mpls.h> #include <linux/tcp.h> #include <linux/ptp_classify.h> #include <net/flow_dissector.h> #include <net/pkt_cls.h> #include <scsi/fc/fc_fcoe.h> #include <uapi/linux/batadv_packet.h> #include <linux/bpf.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_labels.h> #endif #include <linux/bpf-netns.h> static void dissector_set_key(struct flow_dissector *flow_dissector, enum flow_dissector_key_id key_id) { flow_dissector->used_keys |= (1ULL << key_id); } void skb_flow_dissector_init(struct flow_dissector *flow_dissector, const struct flow_dissector_key *key, unsigned int key_count) { unsigned int i; memset(flow_dissector, 0, sizeof(*flow_dissector)); for (i = 0; i < key_count; i++, key++) { /* User should make sure that every key target offset is within * boundaries of unsigned short. */ BUG_ON(key->offset > USHRT_MAX); BUG_ON(dissector_uses_key(flow_dissector, key->key_id)); dissector_set_key(flow_dissector, key->key_id); flow_dissector->offset[key->key_id] = key->offset; } /* Ensure that the dissector always includes control and basic key. * That way we are able to avoid handling lack of these in fast path. */ BUG_ON(!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_CONTROL)); BUG_ON(!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_BASIC)); } EXPORT_SYMBOL(skb_flow_dissector_init); #ifdef CONFIG_BPF_SYSCALL int flow_dissector_bpf_prog_attach_check(struct net *net, struct bpf_prog *prog) { enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; if (net == &init_net) { /* BPF flow dissector in the root namespace overrides * any per-net-namespace one. When attaching to root, * make sure we don't have any BPF program attached * to the non-root namespaces. */ struct net *ns; for_each_net(ns) { if (ns == &init_net) continue; if (rcu_access_pointer(ns->bpf.run_array[type])) return -EEXIST; } } else { /* Make sure root flow dissector is not attached * when attaching to the non-root namespace. */ if (rcu_access_pointer(init_net.bpf.run_array[type])) return -EEXIST; } return 0; } #endif /* CONFIG_BPF_SYSCALL */ /** * skb_flow_get_ports - extract the upper layer ports and return them * @skb: sk_buff to extract the ports from * @thoff: transport header offset * @ip_proto: protocol for which to get port offset * @data: raw buffer pointer to the packet, if NULL use skb->data * @hlen: packet header length, if @data is NULL use skb_headlen(skb) * * The function will try to retrieve the ports at offset thoff + poff where poff * is the protocol port offset returned from proto_ports_offset */ __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, const void *data, int hlen) { int poff = proto_ports_offset(ip_proto); if (!data) { data = skb->data; hlen = skb_headlen(skb); } if (poff >= 0) { __be32 *ports, _ports; ports = __skb_header_pointer(skb, thoff + poff, sizeof(_ports), data, hlen, &_ports); if (ports) return *ports; } return 0; } EXPORT_SYMBOL(skb_flow_get_ports); static bool icmp_has_id(u8 type) { switch (type) { case ICMP_ECHO: case ICMP_ECHOREPLY: case ICMP_TIMESTAMP: case ICMP_TIMESTAMPREPLY: case ICMPV6_ECHO_REQUEST: case ICMPV6_ECHO_REPLY: return true; } return false; } /** * skb_flow_get_icmp_tci - extract ICMP(6) Type, Code and Identifier fields * @skb: sk_buff to extract from * @key_icmp: struct flow_dissector_key_icmp to fill * @data: raw buffer pointer to the packet * @thoff: offset to extract at * @hlen: packet header length */ void skb_flow_get_icmp_tci(const struct sk_buff *skb, struct flow_dissector_key_icmp *key_icmp, const void *data, int thoff, int hlen) { struct icmphdr *ih, _ih; ih = __skb_header_pointer(skb, thoff, sizeof(_ih), data, hlen, &_ih); if (!ih) return; key_icmp->type = ih->type; key_icmp->code = ih->code; /* As we use 0 to signal that the Id field is not present, * avoid confusion with packets without such field */ if (icmp_has_id(ih->type)) key_icmp->id = ih->un.echo.id ? ntohs(ih->un.echo.id) : 1; else key_icmp->id = 0; } EXPORT_SYMBOL(skb_flow_get_icmp_tci); /* If FLOW_DISSECTOR_KEY_ICMP is set, dissect an ICMP packet * using skb_flow_get_icmp_tci(). */ static void __skb_flow_dissect_icmp(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, const void *data, int thoff, int hlen) { struct flow_dissector_key_icmp *key_icmp; if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ICMP)) return; key_icmp = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_ICMP, target_container); skb_flow_get_icmp_tci(skb, key_icmp, data, thoff, hlen); } static void __skb_flow_dissect_ah(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, const void *data, int nhoff, int hlen) { struct flow_dissector_key_ipsec *key_ah; struct ip_auth_hdr _hdr, *hdr; if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPSEC)) return; hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) return; key_ah = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_IPSEC, target_container); key_ah->spi = hdr->spi; } static void __skb_flow_dissect_esp(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, const void *data, int nhoff, int hlen) { struct flow_dissector_key_ipsec *key_esp; struct ip_esp_hdr _hdr, *hdr; if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPSEC)) return; hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) return; key_esp = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_IPSEC, target_container); key_esp->spi = hdr->spi; } static void __skb_flow_dissect_l2tpv3(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, const void *data, int nhoff, int hlen) { struct flow_dissector_key_l2tpv3 *key_l2tpv3; struct { __be32 session_id; } *hdr, _hdr; if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_L2TPV3)) return; hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) return; key_l2tpv3 = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_L2TPV3, target_container); key_l2tpv3->session_id = hdr->session_id; } void skb_flow_dissect_meta(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container) { struct flow_dissector_key_meta *meta; if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_META)) return; meta = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_META, target_container); meta->ingress_ifindex = skb->skb_iif; #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) if (tc_skb_ext_tc_enabled()) { struct tc_skb_ext *ext; ext = skb_ext_find(skb, TC_SKB_EXT); if (ext) meta->l2_miss = ext->l2_miss; } #endif } EXPORT_SYMBOL(skb_flow_dissect_meta); static void skb_flow_dissect_set_enc_control(enum flow_dissector_key_id type, u32 ctrl_flags, struct flow_dissector *flow_dissector, void *target_container) { struct flow_dissector_key_control *ctrl; if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) return; ctrl = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL, target_container); ctrl->addr_type = type; ctrl->flags = ctrl_flags; } void skb_flow_dissect_ct(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, u16 *ctinfo_map, size_t mapsize, bool post_ct, u16 zone) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) struct flow_dissector_key_ct *key; enum ip_conntrack_info ctinfo; struct nf_conn_labels *cl; struct nf_conn *ct; if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_CT)) return; ct = nf_ct_get(skb, &ctinfo); if (!ct && !post_ct) return; key = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_CT, target_container); if (!ct) { key->ct_state = TCA_FLOWER_KEY_CT_FLAGS_TRACKED | TCA_FLOWER_KEY_CT_FLAGS_INVALID; key->ct_zone = zone; return; } if (ctinfo < mapsize) key->ct_state = ctinfo_map[ctinfo]; #if IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) key->ct_zone = ct->zone.id; #endif #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) key->ct_mark = READ_ONCE(ct->mark); #endif cl = nf_ct_labels_find(ct); if (cl) memcpy(key->ct_labels, cl->bits, sizeof(key->ct_labels)); #endif /* CONFIG_NF_CONNTRACK */ } EXPORT_SYMBOL(skb_flow_dissect_ct); void skb_flow_dissect_tunnel_info(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container) { struct ip_tunnel_info *info; struct ip_tunnel_key *key; u32 ctrl_flags = 0; /* A quick check to see if there might be something to do. */ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) && !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) && !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) && !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL) && !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS) && !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IP) && !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) return; info = skb_tunnel_info(skb); if (!info) return; key = &info->key; if (test_bit(IP_TUNNEL_CSUM_BIT, key->tun_flags)) ctrl_flags |= FLOW_DIS_F_TUNNEL_CSUM; if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags)) ctrl_flags |= FLOW_DIS_F_TUNNEL_DONT_FRAGMENT; if (test_bit(IP_TUNNEL_OAM_BIT, key->tun_flags)) ctrl_flags |= FLOW_DIS_F_TUNNEL_OAM; if (test_bit(IP_TUNNEL_CRIT_OPT_BIT, key->tun_flags)) ctrl_flags |= FLOW_DIS_F_TUNNEL_CRIT_OPT; switch (ip_tunnel_info_af(info)) { case AF_INET: skb_flow_dissect_set_enc_control(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ctrl_flags, flow_dissector, target_container); if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { struct flow_dissector_key_ipv4_addrs *ipv4; ipv4 = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, target_container); ipv4->src = key->u.ipv4.src; ipv4->dst = key->u.ipv4.dst; } break; case AF_INET6: skb_flow_dissect_set_enc_control(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ctrl_flags, flow_dissector, target_container); if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { struct flow_dissector_key_ipv6_addrs *ipv6; ipv6 = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, target_container); ipv6->src = key->u.ipv6.src; ipv6->dst = key->u.ipv6.dst; } break; default: skb_flow_dissect_set_enc_control(0, ctrl_flags, flow_dissector, target_container); break; } if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { struct flow_dissector_key_keyid *keyid; keyid = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID, target_container); keyid->keyid = tunnel_id_to_key32(key->tun_id); } if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { struct flow_dissector_key_ports *tp; tp = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS, target_container); tp->src = key->tp_src; tp->dst = key->tp_dst; } if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IP)) { struct flow_dissector_key_ip *ip; ip = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IP, target_container); ip->tos = key->tos; ip->ttl = key->ttl; } if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) { struct flow_dissector_key_enc_opts *enc_opt; IP_TUNNEL_DECLARE_FLAGS(flags) = { }; u32 val; enc_opt = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS, target_container); if (!info->options_len) return; enc_opt->len = info->options_len; ip_tunnel_info_opts_get(enc_opt->data, info); ip_tunnel_set_options_present(flags); ip_tunnel_flags_and(flags, info->key.tun_flags, flags); val = find_next_bit(flags, __IP_TUNNEL_FLAG_NUM, IP_TUNNEL_GENEVE_OPT_BIT); enc_opt->dst_opt_type = val < __IP_TUNNEL_FLAG_NUM ? val : 0; } } EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); void skb_flow_dissect_hash(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container) { struct flow_dissector_key_hash *key; if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_HASH)) return; key = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_HASH, target_container); key->hash = skb_get_hash_raw(skb); } EXPORT_SYMBOL(skb_flow_dissect_hash); static enum flow_dissect_ret __skb_flow_dissect_mpls(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, const void *data, int nhoff, int hlen, int lse_index, bool *entropy_label) { struct mpls_label *hdr, _hdr; u32 entry, label, bos; if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS_ENTROPY) && !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) return FLOW_DISSECT_RET_OUT_GOOD; if (lse_index >= FLOW_DIS_MPLS_MAX) return FLOW_DISSECT_RET_OUT_GOOD; hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) return FLOW_DISSECT_RET_OUT_BAD; entry = ntohl(hdr->entry); label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT; if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) { struct flow_dissector_key_mpls *key_mpls; struct flow_dissector_mpls_lse *lse; key_mpls = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_MPLS, target_container); lse = &key_mpls->ls[lse_index]; lse->mpls_ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT; lse->mpls_bos = bos; lse->mpls_tc = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT; lse->mpls_label = label; dissector_set_mpls_lse(key_mpls, lse_index); } if (*entropy_label && dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) { struct flow_dissector_key_keyid *key_keyid; key_keyid = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_MPLS_ENTROPY, target_container); key_keyid->keyid = cpu_to_be32(label); } *entropy_label = label == MPLS_LABEL_ENTROPY; return bos ? FLOW_DISSECT_RET_OUT_GOOD : FLOW_DISSECT_RET_PROTO_AGAIN; } static enum flow_dissect_ret __skb_flow_dissect_arp(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, const void *data, int nhoff, int hlen) { struct flow_dissector_key_arp *key_arp; struct { unsigned char ar_sha[ETH_ALEN]; unsigned char ar_sip[4]; unsigned char ar_tha[ETH_ALEN]; unsigned char ar_tip[4]; } *arp_eth, _arp_eth; const struct arphdr *arp; struct arphdr _arp; if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ARP)) return FLOW_DISSECT_RET_OUT_GOOD; arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data, hlen, &_arp); if (!arp) return FLOW_DISSECT_RET_OUT_BAD; if (arp->ar_hrd != htons(ARPHRD_ETHER) || arp->ar_pro != htons(ETH_P_IP) || arp->ar_hln != ETH_ALEN || arp->ar_pln != 4 || (arp->ar_op != htons(ARPOP_REPLY) && arp->ar_op != htons(ARPOP_REQUEST))) return FLOW_DISSECT_RET_OUT_BAD; arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp), sizeof(_arp_eth), data, hlen, &_arp_eth); if (!arp_eth) return FLOW_DISSECT_RET_OUT_BAD; key_arp = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_ARP, target_container); memcpy(&key_arp->sip, arp_eth->ar_sip, sizeof(key_arp->sip)); memcpy(&key_arp->tip, arp_eth->ar_tip, sizeof(key_arp->tip)); /* Only store the lower byte of the opcode; * this covers ARPOP_REPLY and ARPOP_REQUEST. */ key_arp->op = ntohs(arp->ar_op) & 0xff; ether_addr_copy(key_arp->sha, arp_eth->ar_sha); ether_addr_copy(key_arp->tha, arp_eth->ar_tha); return FLOW_DISSECT_RET_OUT_GOOD; } static enum flow_dissect_ret __skb_flow_dissect_cfm(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, const void *data, int nhoff, int hlen) { struct flow_dissector_key_cfm *key, *hdr, _hdr; if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_CFM)) return FLOW_DISSECT_RET_OUT_GOOD; hdr = __skb_header_pointer(skb, nhoff, sizeof(*key), data, hlen, &_hdr); if (!hdr) return FLOW_DISSECT_RET_OUT_BAD; key = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_CFM, target_container); key->mdl_ver = hdr->mdl_ver; key->opcode = hdr->opcode; return FLOW_DISSECT_RET_OUT_GOOD; } static enum flow_dissect_ret __skb_flow_dissect_gre(const struct sk_buff *skb, struct flow_dissector_key_control *key_control, struct flow_dissector *flow_dissector, void *target_container, const void *data, __be16 *p_proto, int *p_nhoff, int *p_hlen, unsigned int flags) { struct flow_dissector_key_keyid *key_keyid; struct gre_base_hdr *hdr, _hdr; int offset = 0; u16 gre_ver; hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr), data, *p_hlen, &_hdr); if (!hdr) return FLOW_DISSECT_RET_OUT_BAD; /* Only look inside GRE without routing */ if (hdr->flags & GRE_ROUTING) return FLOW_DISSECT_RET_OUT_GOOD; /* Only look inside GRE for version 0 and 1 */ gre_ver = ntohs(hdr->flags & GRE_VERSION); if (gre_ver > 1) return FLOW_DISSECT_RET_OUT_GOOD; *p_proto = hdr->protocol; if (gre_ver) { /* Version1 must be PPTP, and check the flags */ if (!(*p_proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY))) return FLOW_DISSECT_RET_OUT_GOOD; } offset += sizeof(struct gre_base_hdr); if (hdr->flags & GRE_CSUM) offset += sizeof_field(struct gre_full_hdr, csum) + sizeof_field(struct gre_full_hdr, reserved1); if (hdr->flags & GRE_KEY) { const __be32 *keyid; __be32 _keyid; keyid = __skb_header_pointer(skb, *p_nhoff + offset, sizeof(_keyid), data, *p_hlen, &_keyid); if (!keyid) return FLOW_DISSECT_RET_OUT_BAD; if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_GRE_KEYID)) { key_keyid = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_GRE_KEYID, target_container); if (gre_ver == 0) key_keyid->keyid = *keyid; else key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK; } offset += sizeof_field(struct gre_full_hdr, key); } if (hdr->flags & GRE_SEQ) offset += sizeof_field(struct pptp_gre_header, seq); if (gre_ver == 0) { if (*p_proto == htons(ETH_P_TEB)) { const struct ethhdr *eth; struct ethhdr _eth; eth = __skb_header_pointer(skb, *p_nhoff + offset, sizeof(_eth), data, *p_hlen, &_eth); if (!eth) return FLOW_DISSECT_RET_OUT_BAD; *p_proto = eth->h_proto; offset += sizeof(*eth); /* Cap headers that we access via pointers at the * end of the Ethernet header as our maximum alignment * at that point is only 2 bytes. */ if (NET_IP_ALIGN) *p_hlen = *p_nhoff + offset; } } else { /* version 1, must be PPTP */ u8 _ppp_hdr[PPP_HDRLEN]; u8 *ppp_hdr; if (hdr->flags & GRE_ACK) offset += sizeof_field(struct pptp_gre_header, ack); ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset, sizeof(_ppp_hdr), data, *p_hlen, _ppp_hdr); if (!ppp_hdr) return FLOW_DISSECT_RET_OUT_BAD; switch (PPP_PROTOCOL(ppp_hdr)) { case PPP_IP: *p_proto = htons(ETH_P_IP); break; case PPP_IPV6: *p_proto = htons(ETH_P_IPV6); break; default: /* Could probably catch some more like MPLS */ break; } offset += PPP_HDRLEN; } *p_nhoff += offset; key_control->flags |= FLOW_DIS_ENCAPSULATION; if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) return FLOW_DISSECT_RET_OUT_GOOD; return FLOW_DISSECT_RET_PROTO_AGAIN; } /** * __skb_flow_dissect_batadv() - dissect batman-adv header * @skb: sk_buff to with the batman-adv header * @key_control: flow dissectors control key * @data: raw buffer pointer to the packet, if NULL use skb->data * @p_proto: pointer used to update the protocol to process next * @p_nhoff: pointer used to update inner network header offset * @hlen: packet header length * @flags: any combination of FLOW_DISSECTOR_F_* * * ETH_P_BATMAN packets are tried to be dissected. Only * &struct batadv_unicast packets are actually processed because they contain an * inner ethernet header and are usually followed by actual network header. This * allows the flow dissector to continue processing the packet. * * Return: FLOW_DISSECT_RET_PROTO_AGAIN when &struct batadv_unicast was found, * FLOW_DISSECT_RET_OUT_GOOD when dissector should stop after encapsulation, * otherwise FLOW_DISSECT_RET_OUT_BAD */ static enum flow_dissect_ret __skb_flow_dissect_batadv(const struct sk_buff *skb, struct flow_dissector_key_control *key_control, const void *data, __be16 *p_proto, int *p_nhoff, int hlen, unsigned int flags) { struct { struct batadv_unicast_packet batadv_unicast; struct ethhdr eth; } *hdr, _hdr; hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) return FLOW_DISSECT_RET_OUT_BAD; if (hdr->batadv_unicast.version != BATADV_COMPAT_VERSION) return FLOW_DISSECT_RET_OUT_BAD; if (hdr->batadv_unicast.packet_type != BATADV_UNICAST) return FLOW_DISSECT_RET_OUT_BAD; *p_proto = hdr->eth.h_proto; *p_nhoff += sizeof(*hdr); key_control->flags |= FLOW_DIS_ENCAPSULATION; if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) return FLOW_DISSECT_RET_OUT_GOOD; return FLOW_DISSECT_RET_PROTO_AGAIN; } static void __skb_flow_dissect_tcp(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, const void *data, int thoff, int hlen) { struct flow_dissector_key_tcp *key_tcp; struct tcphdr *th, _th; if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_TCP)) return; th = __skb_header_pointer(skb, thoff, sizeof(_th), data, hlen, &_th); if (!th) return; if (unlikely(__tcp_hdrlen(th) < sizeof(_th))) return; key_tcp = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_TCP, target_container); key_tcp->flags = (*(__be16 *) &tcp_flag_word(th) & htons(0x0FFF)); } static void __skb_flow_dissect_ports(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, const void *data, int nhoff, u8 ip_proto, int hlen) { struct flow_dissector_key_ports_range *key_ports_range = NULL; struct flow_dissector_key_ports *key_ports = NULL; __be32 ports; if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) key_ports = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_PORTS, target_container); if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS_RANGE)) key_ports_range = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_PORTS_RANGE, target_container); if (!key_ports && !key_ports_range) return; ports = skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen); if (key_ports) key_ports->ports = ports; if (key_ports_range) key_ports_range->tp.ports = ports; } static void __skb_flow_dissect_ipv4(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, const void *data, const struct iphdr *iph) { struct flow_dissector_key_ip *key_ip; if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP)) return; key_ip = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_IP, target_container); key_ip->tos = iph->tos; key_ip->ttl = iph->ttl; } static void __skb_flow_dissect_ipv6(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, const void *data, const struct ipv6hdr *iph) { struct flow_dissector_key_ip *key_ip; if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP)) return; key_ip = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_IP, target_container); key_ip->tos = ipv6_get_dsfield(iph); key_ip->ttl = iph->hop_limit; } /* Maximum number of protocol headers that can be parsed in * __skb_flow_dissect */ #define MAX_FLOW_DISSECT_HDRS 15 static bool skb_flow_dissect_allowed(int *num_hdrs) { ++*num_hdrs; return (*num_hdrs <= MAX_FLOW_DISSECT_HDRS); } static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, struct flow_dissector *flow_dissector, void *target_container) { struct flow_dissector_key_ports_range *key_ports_range = NULL; struct flow_dissector_key_ports *key_ports = NULL; struct flow_dissector_key_control *key_control; struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_tags *key_tags; key_control = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_CONTROL, target_container); key_control->thoff = flow_keys->thoff; if (flow_keys->is_frag) key_control->flags |= FLOW_DIS_IS_FRAGMENT; if (flow_keys->is_first_frag) key_control->flags |= FLOW_DIS_FIRST_FRAG; if (flow_keys->is_encap) key_control->flags |= FLOW_DIS_ENCAPSULATION; key_basic = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_BASIC, target_container); key_basic->n_proto = flow_keys->n_proto; key_basic->ip_proto = flow_keys->ip_proto; if (flow_keys->addr_proto == ETH_P_IP && dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { key_addrs = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS, target_container); key_addrs->v4addrs.src = flow_keys->ipv4_src; key_addrs->v4addrs.dst = flow_keys->ipv4_dst; key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } else if (flow_keys->addr_proto == ETH_P_IPV6 && dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { key_addrs = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_IPV6_ADDRS, target_container); memcpy(&key_addrs->v6addrs.src, &flow_keys->ipv6_src, sizeof(key_addrs->v6addrs.src)); memcpy(&key_addrs->v6addrs.dst, &flow_keys->ipv6_dst, sizeof(key_addrs->v6addrs.dst)); key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) { key_ports = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_PORTS, target_container); key_ports->src = flow_keys->sport; key_ports->dst = flow_keys->dport; } if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS_RANGE)) { key_ports_range = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_PORTS_RANGE, target_container); key_ports_range->tp.src = flow_keys->sport; key_ports_range->tp.dst = flow_keys->dport; } if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_FLOW_LABEL)) { key_tags = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_FLOW_LABEL, target_container); key_tags->flow_label = ntohl(flow_keys->flow_label); } } u32 bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, __be16 proto, int nhoff, int hlen, unsigned int flags) { struct bpf_flow_keys *flow_keys = ctx->flow_keys; u32 result; /* Pass parameters to the BPF program */ memset(flow_keys, 0, sizeof(*flow_keys)); flow_keys->n_proto = proto; flow_keys->nhoff = nhoff; flow_keys->thoff = flow_keys->nhoff; BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG != (int)FLOW_DISSECTOR_F_PARSE_1ST_FRAG); BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL != (int)FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP != (int)FLOW_DISSECTOR_F_STOP_AT_ENCAP); flow_keys->flags = flags; result = bpf_prog_run_pin_on_cpu(prog, ctx); flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen); flow_keys->thoff = clamp_t(u16, flow_keys->thoff, flow_keys->nhoff, hlen); return result; } static bool is_pppoe_ses_hdr_valid(const struct pppoe_hdr *hdr) { return hdr->ver == 1 && hdr->type == 1 && hdr->code == 0; } /** * __skb_flow_dissect - extract the flow_keys struct and return it * @net: associated network namespace, derived from @skb if NULL * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified * @flow_dissector: list of keys to dissect * @target_container: target structure to put dissected values into * @data: raw buffer pointer to the packet, if NULL use skb->data * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb) * @hlen: packet header length, if @data is NULL use skb_headlen(skb) * @flags: flags that control the dissection process, e.g. * FLOW_DISSECTOR_F_STOP_AT_ENCAP. * * The function will try to retrieve individual keys into target specified * by flow_dissector from either the skbuff or a raw buffer specified by the * rest parameters. * * Caller must take care of zeroing target container memory. */ bool __skb_flow_dissect(const struct net *net, const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, const void *data, __be16 proto, int nhoff, int hlen, unsigned int flags) { struct flow_dissector_key_control *key_control; struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_vlan *key_vlan; enum flow_dissect_ret fdret; enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; bool mpls_el = false; int mpls_lse = 0; int num_hdrs = 0; u8 ip_proto = 0; bool ret; if (!data) { data = skb->data; proto = skb_vlan_tag_present(skb) ? skb->vlan_proto : skb->protocol; nhoff = skb_network_offset(skb); hlen = skb_headlen(skb); #if IS_ENABLED(CONFIG_NET_DSA) if (unlikely(skb->dev && netdev_uses_dsa(skb->dev) && proto == htons(ETH_P_XDSA))) { struct metadata_dst *md_dst = skb_metadata_dst(skb); const struct dsa_device_ops *ops; int offset = 0; ops = skb->dev->dsa_ptr->tag_ops; /* Only DSA header taggers break flow dissection */ if (ops->needed_headroom && (!md_dst || md_dst->type != METADATA_HW_PORT_MUX)) { if (ops->flow_dissect) ops->flow_dissect(skb, &proto, &offset); else dsa_tag_generic_flow_dissect(skb, &proto, &offset); hlen -= offset; nhoff += offset; } } #endif } /* It is ensured by skb_flow_dissector_init() that control key will * be always present. */ key_control = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_CONTROL, target_container); /* It is ensured by skb_flow_dissector_init() that basic key will * be always present. */ key_basic = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_BASIC, target_container); rcu_read_lock(); if (skb) { if (!net) { if (skb->dev) net = dev_net_rcu(skb->dev); else if (skb->sk) net = sock_net(skb->sk); } } DEBUG_NET_WARN_ON_ONCE(!net); if (net) { enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; struct bpf_prog_array *run_array; run_array = rcu_dereference(init_net.bpf.run_array[type]); if (!run_array) run_array = rcu_dereference(net->bpf.run_array[type]); if (run_array) { struct bpf_flow_keys flow_keys; struct bpf_flow_dissector ctx = { .flow_keys = &flow_keys, .data = data, .data_end = data + hlen, }; __be16 n_proto = proto; struct bpf_prog *prog; u32 result; if (skb) { ctx.skb = skb; /* we can't use 'proto' in the skb case * because it might be set to skb->vlan_proto * which has been pulled from the data */ n_proto = skb->protocol; } prog = READ_ONCE(run_array->items[0].prog); result = bpf_flow_dissect(prog, &ctx, n_proto, nhoff, hlen, flags); if (result != BPF_FLOW_DISSECTOR_CONTINUE) { __skb_flow_bpf_to_target(&flow_keys, flow_dissector, target_container); rcu_read_unlock(); return result == BPF_OK; } } } rcu_read_unlock(); if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { struct ethhdr *eth = eth_hdr(skb); struct flow_dissector_key_eth_addrs *key_eth_addrs; key_eth_addrs = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS, target_container); memcpy(key_eth_addrs, eth, sizeof(*key_eth_addrs)); } if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_NUM_OF_VLANS)) { struct flow_dissector_key_num_of_vlans *key_num_of_vlans; key_num_of_vlans = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_NUM_OF_VLANS, target_container); key_num_of_vlans->num_of_vlans = 0; } proto_again: fdret = FLOW_DISSECT_RET_CONTINUE; switch (proto) { case htons(ETH_P_IP): { const struct iphdr *iph; struct iphdr _iph; iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); if (!iph || iph->ihl < 5) { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } nhoff += iph->ihl * 4; ip_proto = iph->protocol; if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { key_addrs = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS, target_container); memcpy(&key_addrs->v4addrs.src, &iph->saddr, sizeof(key_addrs->v4addrs.src)); memcpy(&key_addrs->v4addrs.dst, &iph->daddr, sizeof(key_addrs->v4addrs.dst)); key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } __skb_flow_dissect_ipv4(skb, flow_dissector, target_container, data, iph); if (ip_is_fragment(iph)) { key_control->flags |= FLOW_DIS_IS_FRAGMENT; if (iph->frag_off & htons(IP_OFFSET)) { fdret = FLOW_DISSECT_RET_OUT_GOOD; break; } else { key_control->flags |= FLOW_DIS_FIRST_FRAG; if (!(flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) { fdret = FLOW_DISSECT_RET_OUT_GOOD; break; } } } break; } case htons(ETH_P_IPV6): { const struct ipv6hdr *iph; struct ipv6hdr _iph; iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); if (!iph) { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } ip_proto = iph->nexthdr; nhoff += sizeof(struct ipv6hdr); if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { key_addrs = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_IPV6_ADDRS, target_container); memcpy(&key_addrs->v6addrs.src, &iph->saddr, sizeof(key_addrs->v6addrs.src)); memcpy(&key_addrs->v6addrs.dst, &iph->daddr, sizeof(key_addrs->v6addrs.dst)); key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } if ((dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_FLOW_LABEL) || (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) && ip6_flowlabel(iph)) { __be32 flow_label = ip6_flowlabel(iph); if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_FLOW_LABEL)) { key_tags = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_FLOW_LABEL, target_container); key_tags->flow_label = ntohl(flow_label); } if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) { fdret = FLOW_DISSECT_RET_OUT_GOOD; break; } } __skb_flow_dissect_ipv6(skb, flow_dissector, target_container, data, iph); break; } case htons(ETH_P_8021AD): case htons(ETH_P_8021Q): { const struct vlan_hdr *vlan = NULL; struct vlan_hdr _vlan; __be16 saved_vlan_tpid = proto; if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX && skb && skb_vlan_tag_present(skb)) { proto = skb->protocol; } else { vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan); if (!vlan) { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } proto = vlan->h_vlan_encapsulated_proto; nhoff += sizeof(*vlan); } if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_NUM_OF_VLANS) && !(key_control->flags & FLOW_DIS_ENCAPSULATION)) { struct flow_dissector_key_num_of_vlans *key_nvs; key_nvs = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_NUM_OF_VLANS, target_container); key_nvs->num_of_vlans++; } if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX) { dissector_vlan = FLOW_DISSECTOR_KEY_VLAN; } else if (dissector_vlan == FLOW_DISSECTOR_KEY_VLAN) { dissector_vlan = FLOW_DISSECTOR_KEY_CVLAN; } else { fdret = FLOW_DISSECT_RET_PROTO_AGAIN; break; } if (dissector_uses_key(flow_dissector, dissector_vlan)) { key_vlan = skb_flow_dissector_target(flow_dissector, dissector_vlan, target_container); if (!vlan) { key_vlan->vlan_id = skb_vlan_tag_get_id(skb); key_vlan->vlan_priority = skb_vlan_tag_get_prio(skb); } else { key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) & VLAN_VID_MASK; key_vlan->vlan_priority = (ntohs(vlan->h_vlan_TCI) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; } key_vlan->vlan_tpid = saved_vlan_tpid; key_vlan->vlan_eth_type = proto; } fdret = FLOW_DISSECT_RET_PROTO_AGAIN; break; } case htons(ETH_P_PPP_SES): { struct { struct pppoe_hdr hdr; __be16 proto; } *hdr, _hdr; u16 ppp_proto; hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } if (!is_pppoe_ses_hdr_valid(&hdr->hdr)) { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } /* least significant bit of the most significant octet * indicates if protocol field was compressed */ ppp_proto = ntohs(hdr->proto); if (ppp_proto & 0x0100) { ppp_proto = ppp_proto >> 8; nhoff += PPPOE_SES_HLEN - 1; } else { nhoff += PPPOE_SES_HLEN; } if (ppp_proto == PPP_IP) { proto = htons(ETH_P_IP); fdret = FLOW_DISSECT_RET_PROTO_AGAIN; } else if (ppp_proto == PPP_IPV6) { proto = htons(ETH_P_IPV6); fdret = FLOW_DISSECT_RET_PROTO_AGAIN; } else if (ppp_proto == PPP_MPLS_UC) { proto = htons(ETH_P_MPLS_UC); fdret = FLOW_DISSECT_RET_PROTO_AGAIN; } else if (ppp_proto == PPP_MPLS_MC) { proto = htons(ETH_P_MPLS_MC); fdret = FLOW_DISSECT_RET_PROTO_AGAIN; } else if (ppp_proto_is_valid(ppp_proto)) { fdret = FLOW_DISSECT_RET_OUT_GOOD; } else { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PPPOE)) { struct flow_dissector_key_pppoe *key_pppoe; key_pppoe = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_PPPOE, target_container); key_pppoe->session_id = hdr->hdr.sid; key_pppoe->ppp_proto = htons(ppp_proto); key_pppoe->type = htons(ETH_P_PPP_SES); } break; } case htons(ETH_P_TIPC): { struct tipc_basic_hdr *hdr, _hdr; hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_TIPC)) { key_addrs = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_TIPC, target_container); key_addrs->tipckey.key = tipc_hdr_rps_key(hdr); key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC; } fdret = FLOW_DISSECT_RET_OUT_GOOD; break; } case htons(ETH_P_MPLS_UC): case htons(ETH_P_MPLS_MC): fdret = __skb_flow_dissect_mpls(skb, flow_dissector, target_container, data, nhoff, hlen, mpls_lse, &mpls_el); nhoff += sizeof(struct mpls_label); mpls_lse++; break; case htons(ETH_P_FCOE): if ((hlen - nhoff) < FCOE_HEADER_LEN) { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } nhoff += FCOE_HEADER_LEN; fdret = FLOW_DISSECT_RET_OUT_GOOD; break; case htons(ETH_P_ARP): case htons(ETH_P_RARP): fdret = __skb_flow_dissect_arp(skb, flow_dissector, target_container, data, nhoff, hlen); break; case htons(ETH_P_BATMAN): fdret = __skb_flow_dissect_batadv(skb, key_control, data, &proto, &nhoff, hlen, flags); break; case htons(ETH_P_1588): { struct ptp_header *hdr, _hdr; hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } nhoff += sizeof(struct ptp_header); fdret = FLOW_DISSECT_RET_OUT_GOOD; break; } case htons(ETH_P_PRP): case htons(ETH_P_HSR): { struct hsr_tag *hdr, _hdr; hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } proto = hdr->encap_proto; nhoff += HSR_HLEN; fdret = FLOW_DISSECT_RET_PROTO_AGAIN; break; } case htons(ETH_P_CFM): fdret = __skb_flow_dissect_cfm(skb, flow_dissector, target_container, data, nhoff, hlen); break; default: fdret = FLOW_DISSECT_RET_OUT_BAD; break; } /* Process result of proto processing */ switch (fdret) { case FLOW_DISSECT_RET_OUT_GOOD: goto out_good; case FLOW_DISSECT_RET_PROTO_AGAIN: if (skb_flow_dissect_allowed(&num_hdrs)) goto proto_again; goto out_good; case FLOW_DISSECT_RET_CONTINUE: case FLOW_DISSECT_RET_IPPROTO_AGAIN: break; case FLOW_DISSECT_RET_OUT_BAD: default: goto out_bad; } ip_proto_again: fdret = FLOW_DISSECT_RET_CONTINUE; switch (ip_proto) { case IPPROTO_GRE: if (flags & FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP) { fdret = FLOW_DISSECT_RET_OUT_GOOD; break; } fdret = __skb_flow_dissect_gre(skb, key_control, flow_dissector, target_container, data, &proto, &nhoff, &hlen, flags); break; case NEXTHDR_HOP: case NEXTHDR_ROUTING: case NEXTHDR_DEST: { u8 _opthdr[2], *opthdr; if (proto != htons(ETH_P_IPV6)) break; opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr), data, hlen, &_opthdr); if (!opthdr) { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } ip_proto = opthdr[0]; nhoff += (opthdr[1] + 1) << 3; fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN; break; } case NEXTHDR_FRAGMENT: { struct frag_hdr _fh, *fh; if (proto != htons(ETH_P_IPV6)) break; fh = __skb_header_pointer(skb, nhoff, sizeof(_fh), data, hlen, &_fh); if (!fh) { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } key_control->flags |= FLOW_DIS_IS_FRAGMENT; nhoff += sizeof(_fh); ip_proto = fh->nexthdr; if (!(fh->frag_off & htons(IP6_OFFSET))) { key_control->flags |= FLOW_DIS_FIRST_FRAG; if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) { fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN; break; } } fdret = FLOW_DISSECT_RET_OUT_GOOD; break; } case IPPROTO_IPIP: if (flags & FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP) { fdret = FLOW_DISSECT_RET_OUT_GOOD; break; } proto = htons(ETH_P_IP); key_control->flags |= FLOW_DIS_ENCAPSULATION; if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) { fdret = FLOW_DISSECT_RET_OUT_GOOD; break; } fdret = FLOW_DISSECT_RET_PROTO_AGAIN; break; case IPPROTO_IPV6: if (flags & FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP) { fdret = FLOW_DISSECT_RET_OUT_GOOD; break; } proto = htons(ETH_P_IPV6); key_control->flags |= FLOW_DIS_ENCAPSULATION; if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) { fdret = FLOW_DISSECT_RET_OUT_GOOD; break; } fdret = FLOW_DISSECT_RET_PROTO_AGAIN; break; case IPPROTO_MPLS: proto = htons(ETH_P_MPLS_UC); fdret = FLOW_DISSECT_RET_PROTO_AGAIN; break; case IPPROTO_TCP: __skb_flow_dissect_tcp(skb, flow_dissector, target_container, data, nhoff, hlen); break; case IPPROTO_ICMP: case IPPROTO_ICMPV6: __skb_flow_dissect_icmp(skb, flow_dissector, target_container, data, nhoff, hlen); break; case IPPROTO_L2TP: __skb_flow_dissect_l2tpv3(skb, flow_dissector, target_container, data, nhoff, hlen); break; case IPPROTO_ESP: __skb_flow_dissect_esp(skb, flow_dissector, target_container, data, nhoff, hlen); break; case IPPROTO_AH: __skb_flow_dissect_ah(skb, flow_dissector, target_container, data, nhoff, hlen); break; default: break; } if (!(key_control->flags & FLOW_DIS_IS_FRAGMENT)) __skb_flow_dissect_ports(skb, flow_dissector, target_container, data, nhoff, ip_proto, hlen); /* Process result of IP proto processing */ switch (fdret) { case FLOW_DISSECT_RET_PROTO_AGAIN: if (skb_flow_dissect_allowed(&num_hdrs)) goto proto_again; break; case FLOW_DISSECT_RET_IPPROTO_AGAIN: if (skb_flow_dissect_allowed(&num_hdrs)) goto ip_proto_again; break; case FLOW_DISSECT_RET_OUT_GOOD: case FLOW_DISSECT_RET_CONTINUE: break; case FLOW_DISSECT_RET_OUT_BAD: default: goto out_bad; } out_good: ret = true; out: key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); key_basic->n_proto = proto; key_basic->ip_proto = ip_proto; return ret; out_bad: ret = false; goto out; } EXPORT_SYMBOL(__skb_flow_dissect); static siphash_aligned_key_t hashrnd; static __always_inline void __flow_hash_secret_init(void) { net_get_random_once(&hashrnd, sizeof(hashrnd)); } static const void *flow_keys_hash_start(const struct flow_keys *flow) { BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % SIPHASH_ALIGNMENT); return &flow->FLOW_KEYS_HASH_START_FIELD; } static inline size_t flow_keys_hash_length(const struct flow_keys *flow) { size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs); BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32)); switch (flow->control.addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: diff -= sizeof(flow->addrs.v4addrs); break; case FLOW_DISSECTOR_KEY_IPV6_ADDRS: diff -= sizeof(flow->addrs.v6addrs); break; case FLOW_DISSECTOR_KEY_TIPC: diff -= sizeof(flow->addrs.tipckey); break; } return sizeof(*flow) - diff; } __be32 flow_get_u32_src(const struct flow_keys *flow) { switch (flow->control.addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: return flow->addrs.v4addrs.src; case FLOW_DISSECTOR_KEY_IPV6_ADDRS: return (__force __be32)ipv6_addr_hash( &flow->addrs.v6addrs.src); case FLOW_DISSECTOR_KEY_TIPC: return flow->addrs.tipckey.key; default: return 0; } } EXPORT_SYMBOL(flow_get_u32_src); __be32 flow_get_u32_dst(const struct flow_keys *flow) { switch (flow->control.addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: return flow->addrs.v4addrs.dst; case FLOW_DISSECTOR_KEY_IPV6_ADDRS: return (__force __be32)ipv6_addr_hash( &flow->addrs.v6addrs.dst); default: return 0; } } EXPORT_SYMBOL(flow_get_u32_dst); /* Sort the source and destination IP and the ports, * to have consistent hash within the two directions */ static inline void __flow_hash_consistentify(struct flow_keys *keys) { int addr_diff, i; switch (keys->control.addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: if ((__force u32)keys->addrs.v4addrs.dst < (__force u32)keys->addrs.v4addrs.src) swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst); if ((__force u16)keys->ports.dst < (__force u16)keys->ports.src) { swap(keys->ports.src, keys->ports.dst); } break; case FLOW_DISSECTOR_KEY_IPV6_ADDRS: addr_diff = memcmp(&keys->addrs.v6addrs.dst, &keys->addrs.v6addrs.src, sizeof(keys->addrs.v6addrs.dst)); if (addr_diff < 0) { for (i = 0; i < 4; i++) swap(keys->addrs.v6addrs.src.s6_addr32[i], keys->addrs.v6addrs.dst.s6_addr32[i]); } if ((__force u16)keys->ports.dst < (__force u16)keys->ports.src) { swap(keys->ports.src, keys->ports.dst); } break; } } static inline u32 __flow_hash_from_keys(struct flow_keys *keys, const siphash_key_t *keyval) { u32 hash; __flow_hash_consistentify(keys); hash = siphash(flow_keys_hash_start(keys), flow_keys_hash_length(keys), keyval); if (!hash) hash = 1; return hash; } u32 flow_hash_from_keys(struct flow_keys *keys) { __flow_hash_secret_init(); return __flow_hash_from_keys(keys, &hashrnd); } EXPORT_SYMBOL(flow_hash_from_keys); u32 flow_hash_from_keys_seed(struct flow_keys *keys, const siphash_key_t *keyval) { return __flow_hash_from_keys(keys, keyval); } EXPORT_SYMBOL(flow_hash_from_keys_seed); static inline u32 ___skb_get_hash(const struct sk_buff *skb, struct flow_keys *keys, const siphash_key_t *keyval) { skb_flow_dissect_flow_keys(skb, keys, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); return __flow_hash_from_keys(keys, keyval); } struct _flow_keys_digest_data { __be16 n_proto; u8 ip_proto; u8 padding; __be32 ports; __be32 src; __be32 dst; }; void make_flow_keys_digest(struct flow_keys_digest *digest, const struct flow_keys *flow) { struct _flow_keys_digest_data *data = (struct _flow_keys_digest_data *)digest; BUILD_BUG_ON(sizeof(*data) > sizeof(*digest)); memset(digest, 0, sizeof(*digest)); data->n_proto = flow->basic.n_proto; data->ip_proto = flow->basic.ip_proto; data->ports = flow->ports.ports; data->src = flow->addrs.v4addrs.src; data->dst = flow->addrs.v4addrs.dst; } EXPORT_SYMBOL(make_flow_keys_digest); static struct flow_dissector flow_keys_dissector_symmetric __read_mostly; u32 __skb_get_hash_symmetric_net(const struct net *net, const struct sk_buff *skb) { struct flow_keys keys; __flow_hash_secret_init(); memset(&keys, 0, sizeof(keys)); __skb_flow_dissect(net, skb, &flow_keys_dissector_symmetric, &keys, NULL, 0, 0, 0, 0); return __flow_hash_from_keys(&keys, &hashrnd); } EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric_net); /** * __skb_get_hash_net: calculate a flow hash * @net: associated network namespace, derived from @skb if NULL * @skb: sk_buff to calculate flow hash from * * This function calculates a flow hash based on src/dst addresses * and src/dst port numbers. Sets hash in skb to non-zero hash value * on success, zero indicates no valid hash. Also, sets l4_hash in skb * if hash is a canonical 4-tuple hash over transport ports. */ void __skb_get_hash_net(const struct net *net, struct sk_buff *skb) { struct flow_keys keys; u32 hash; memset(&keys, 0, sizeof(keys)); __skb_flow_dissect(net, skb, &flow_keys_dissector, &keys, NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); __flow_hash_secret_init(); hash = __flow_hash_from_keys(&keys, &hashrnd); __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); } EXPORT_SYMBOL(__skb_get_hash_net); __u32 skb_get_hash_perturb(const struct sk_buff *skb, const siphash_key_t *perturb) { struct flow_keys keys; return ___skb_get_hash(skb, &keys, perturb); } EXPORT_SYMBOL(skb_get_hash_perturb); u32 __skb_get_poff(const struct sk_buff *skb, const void *data, const struct flow_keys_basic *keys, int hlen) { u32 poff = keys->control.thoff; /* skip L4 headers for fragments after the first */ if ((keys->control.flags & FLOW_DIS_IS_FRAGMENT) && !(keys->control.flags & FLOW_DIS_FIRST_FRAG)) return poff; switch (keys->basic.ip_proto) { case IPPROTO_TCP: { /* access doff as u8 to avoid unaligned access */ const u8 *doff; u8 _doff; doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff), data, hlen, &_doff); if (!doff) return poff; poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2); break; } case IPPROTO_UDP: case IPPROTO_UDPLITE: poff += sizeof(struct udphdr); break; /* For the rest, we do not really care about header * extensions at this point for now. */ case IPPROTO_ICMP: poff += sizeof(struct icmphdr); break; case IPPROTO_ICMPV6: poff += sizeof(struct icmp6hdr); break; case IPPROTO_IGMP: poff += sizeof(struct igmphdr); break; case IPPROTO_DCCP: poff += sizeof(struct dccp_hdr); break; case IPPROTO_SCTP: poff += sizeof(struct sctphdr); break; } return poff; } /** * skb_get_poff - get the offset to the payload * @skb: sk_buff to get the payload offset from * * The function will get the offset to the payload as far as it could * be dissected. The main user is currently BPF, so that we can dynamically * truncate packets without needing to push actual payload to the user * space and can analyze headers only, instead. */ u32 skb_get_poff(const struct sk_buff *skb) { struct flow_keys_basic keys; if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, NULL, 0, 0, 0, 0)) return 0; return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); } __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys) { memset(keys, 0, sizeof(*keys)); memcpy(&keys->addrs.v6addrs.src, &fl6->saddr, sizeof(keys->addrs.v6addrs.src)); memcpy(&keys->addrs.v6addrs.dst, &fl6->daddr, sizeof(keys->addrs.v6addrs.dst)); keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; keys->ports.src = fl6->fl6_sport; keys->ports.dst = fl6->fl6_dport; keys->keyid.keyid = fl6->fl6_gre_key; keys->tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); keys->basic.ip_proto = fl6->flowi6_proto; return flow_hash_from_keys(keys); } EXPORT_SYMBOL(__get_hash_from_flowi6); static const struct flow_dissector_key flow_keys_dissector_keys[] = { { .key_id = FLOW_DISSECTOR_KEY_CONTROL, .offset = offsetof(struct flow_keys, control), }, { .key_id = FLOW_DISSECTOR_KEY_BASIC, .offset = offsetof(struct flow_keys, basic), }, { .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, .offset = offsetof(struct flow_keys, addrs.v4addrs), }, { .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, .offset = offsetof(struct flow_keys, addrs.v6addrs), }, { .key_id = FLOW_DISSECTOR_KEY_TIPC, .offset = offsetof(struct flow_keys, addrs.tipckey), }, { .key_id = FLOW_DISSECTOR_KEY_PORTS, .offset = offsetof(struct flow_keys, ports), }, { .key_id = FLOW_DISSECTOR_KEY_VLAN, .offset = offsetof(struct flow_keys, vlan), }, { .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL, .offset = offsetof(struct flow_keys, tags), }, { .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID, .offset = offsetof(struct flow_keys, keyid), }, }; static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = { { .key_id = FLOW_DISSECTOR_KEY_CONTROL, .offset = offsetof(struct flow_keys, control), }, { .key_id = FLOW_DISSECTOR_KEY_BASIC, .offset = offsetof(struct flow_keys, basic), }, { .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, .offset = offsetof(struct flow_keys, addrs.v4addrs), }, { .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, .offset = offsetof(struct flow_keys, addrs.v6addrs), }, { .key_id = FLOW_DISSECTOR_KEY_PORTS, .offset = offsetof(struct flow_keys, ports), }, }; static const struct flow_dissector_key flow_keys_basic_dissector_keys[] = { { .key_id = FLOW_DISSECTOR_KEY_CONTROL, .offset = offsetof(struct flow_keys, control), }, { .key_id = FLOW_DISSECTOR_KEY_BASIC, .offset = offsetof(struct flow_keys, basic), }, }; struct flow_dissector flow_keys_dissector __read_mostly; EXPORT_SYMBOL(flow_keys_dissector); struct flow_dissector flow_keys_basic_dissector __read_mostly; EXPORT_SYMBOL(flow_keys_basic_dissector); static int __init init_default_flow_dissectors(void) { skb_flow_dissector_init(&flow_keys_dissector, flow_keys_dissector_keys, ARRAY_SIZE(flow_keys_dissector_keys)); skb_flow_dissector_init(&flow_keys_dissector_symmetric, flow_keys_dissector_symmetric_keys, ARRAY_SIZE(flow_keys_dissector_symmetric_keys)); skb_flow_dissector_init(&flow_keys_basic_dissector, flow_keys_basic_dissector_keys, ARRAY_SIZE(flow_keys_basic_dissector_keys)); return 0; } core_initcall(init_default_flow_dissectors);
11 11 11 11 11 11 12 12 1 1 1 1 1 1 1 1 1 1 1 1 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2005-2006 Micronas USA Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/kernel.h> #include <linux/wait.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/mm.h> #include <linux/usb.h> #include <linux/i2c.h> #include <asm/byteorder.h> #include <media/i2c/saa7115.h> #include <media/tuner.h> #include <media/i2c/uda1342.h> #include "go7007-priv.h" static unsigned int assume_endura; module_param(assume_endura, int, 0644); MODULE_PARM_DESC(assume_endura, "when probing fails, hardware is a Pelco Endura"); /* #define GO7007_I2C_DEBUG */ /* for debugging the EZ-USB I2C adapter */ #define HPI_STATUS_ADDR 0xFFF4 #define INT_PARAM_ADDR 0xFFF6 #define INT_INDEX_ADDR 0xFFF8 /* * Pipes on EZ-USB interface: * 0 snd - Control * 0 rcv - Control * 2 snd - Download firmware (control) * 4 rcv - Read Interrupt (interrupt) * 6 rcv - Read Video (bulk) * 8 rcv - Read Audio (bulk) */ #define GO7007_USB_EZUSB (1<<0) #define GO7007_USB_EZUSB_I2C (1<<1) struct go7007_usb_board { unsigned int flags; struct go7007_board_info main_info; }; struct go7007_usb { const struct go7007_usb_board *board; struct mutex i2c_lock; struct usb_device *usbdev; struct urb *video_urbs[8]; struct urb *audio_urbs[8]; struct urb *intr_urb; }; /*********************** Product specification data ***********************/ static const struct go7007_usb_board board_matrix_ii = { .flags = GO7007_USB_EZUSB, .main_info = { .flags = GO7007_BOARD_HAS_AUDIO | GO7007_BOARD_USE_ONBOARD_I2C, .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_WORD_16, .audio_rate = 48000, .audio_bclk_div = 8, .audio_main_div = 2, .hpi_buffer_cap = 7, .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_VALID_ENABLE | GO7007_SENSOR_TV | GO7007_SENSOR_SAA7115 | GO7007_SENSOR_VBI | GO7007_SENSOR_SCALING, .num_i2c_devs = 1, .i2c_devs = { { .type = "saa7115", .addr = 0x20, .is_video = 1, }, }, .num_inputs = 2, .inputs = { { .video_input = 0, .name = "Composite", }, { .video_input = 9, .name = "S-Video", }, }, .video_config = SAA7115_IDQ_IS_DEFAULT, }, }; static const struct go7007_usb_board board_matrix_reload = { .flags = GO7007_USB_EZUSB, .main_info = { .flags = GO7007_BOARD_HAS_AUDIO | GO7007_BOARD_USE_ONBOARD_I2C, .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_I2S_MASTER | GO7007_AUDIO_WORD_16, .audio_rate = 48000, .audio_bclk_div = 8, .audio_main_div = 2, .hpi_buffer_cap = 7, .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_TV, .num_i2c_devs = 1, .i2c_devs = { { .type = "saa7113", .addr = 0x25, .is_video = 1, }, }, .num_inputs = 2, .inputs = { { .video_input = 0, .name = "Composite", }, { .video_input = 9, .name = "S-Video", }, }, .video_config = SAA7115_IDQ_IS_DEFAULT, }, }; static const struct go7007_usb_board board_star_trek = { .flags = GO7007_USB_EZUSB | GO7007_USB_EZUSB_I2C, .main_info = { .flags = GO7007_BOARD_HAS_AUDIO, /* | GO7007_BOARD_HAS_TUNER, */ .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_VALID_ENABLE | GO7007_SENSOR_TV | GO7007_SENSOR_SAA7115 | GO7007_SENSOR_VBI | GO7007_SENSOR_SCALING, .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_WORD_16, .audio_bclk_div = 8, .audio_main_div = 2, .hpi_buffer_cap = 7, .num_i2c_devs = 1, .i2c_devs = { { .type = "saa7115", .addr = 0x20, .is_video = 1, }, }, .num_inputs = 2, .inputs = { /* { * .video_input = 3, * .audio_index = AUDIO_TUNER, * .name = "Tuner", * }, */ { .video_input = 1, /* .audio_index = AUDIO_EXTERN, */ .name = "Composite", }, { .video_input = 8, /* .audio_index = AUDIO_EXTERN, */ .name = "S-Video", }, }, .video_config = SAA7115_IDQ_IS_DEFAULT, }, }; static const struct go7007_usb_board board_px_tv402u = { .flags = GO7007_USB_EZUSB | GO7007_USB_EZUSB_I2C, .main_info = { .flags = GO7007_BOARD_HAS_AUDIO | GO7007_BOARD_HAS_TUNER, .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_VALID_ENABLE | GO7007_SENSOR_TV | GO7007_SENSOR_SAA7115 | GO7007_SENSOR_VBI | GO7007_SENSOR_SCALING, .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_WORD_16, .audio_bclk_div = 8, .audio_main_div = 2, .hpi_buffer_cap = 7, .num_i2c_devs = 5, .i2c_devs = { { .type = "saa7115", .addr = 0x20, .is_video = 1, }, { .type = "uda1342", .addr = 0x1a, .is_audio = 1, }, { .type = "tuner", .addr = 0x60, }, { .type = "tuner", .addr = 0x43, }, { .type = "sony-btf-mpx", .addr = 0x44, }, }, .num_inputs = 3, .inputs = { { .video_input = 3, .audio_index = 0, .name = "Tuner", }, { .video_input = 1, .audio_index = 1, .name = "Composite", }, { .video_input = 8, .audio_index = 1, .name = "S-Video", }, }, .video_config = SAA7115_IDQ_IS_DEFAULT, .num_aud_inputs = 2, .aud_inputs = { { .audio_input = UDA1342_IN2, .name = "Tuner", }, { .audio_input = UDA1342_IN1, .name = "Line In", }, }, }, }; static const struct go7007_usb_board board_xmen = { .flags = 0, .main_info = { .flags = GO7007_BOARD_USE_ONBOARD_I2C, .hpi_buffer_cap = 0, .sensor_flags = GO7007_SENSOR_VREF_POLAR, .sensor_width = 320, .sensor_height = 240, .sensor_framerate = 30030, .audio_flags = GO7007_AUDIO_ONE_CHANNEL | GO7007_AUDIO_I2S_MODE_3 | GO7007_AUDIO_WORD_14 | GO7007_AUDIO_I2S_MASTER | GO7007_AUDIO_BCLK_POLAR | GO7007_AUDIO_OKI_MODE, .audio_rate = 8000, .audio_bclk_div = 48, .audio_main_div = 1, .num_i2c_devs = 1, .i2c_devs = { { .type = "ov7640", .addr = 0x21, }, }, .num_inputs = 1, .inputs = { { .name = "Camera", }, }, }, }; static const struct go7007_usb_board board_matrix_revolution = { .flags = GO7007_USB_EZUSB, .main_info = { .flags = GO7007_BOARD_HAS_AUDIO | GO7007_BOARD_USE_ONBOARD_I2C, .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_I2S_MASTER | GO7007_AUDIO_WORD_16, .audio_rate = 48000, .audio_bclk_div = 8, .audio_main_div = 2, .hpi_buffer_cap = 7, .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_TV | GO7007_SENSOR_VBI, .num_i2c_devs = 1, .i2c_devs = { { .type = "tw9903", .is_video = 1, .addr = 0x44, }, }, .num_inputs = 2, .inputs = { { .video_input = 2, .name = "Composite", }, { .video_input = 8, .name = "S-Video", }, }, }, }; #if 0 static const struct go7007_usb_board board_lifeview_lr192 = { .flags = GO7007_USB_EZUSB, .main_info = { .flags = GO7007_BOARD_HAS_AUDIO | GO7007_BOARD_USE_ONBOARD_I2C, .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_WORD_16, .audio_rate = 48000, .audio_bclk_div = 8, .audio_main_div = 2, .hpi_buffer_cap = 7, .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_VALID_ENABLE | GO7007_SENSOR_TV | GO7007_SENSOR_VBI | GO7007_SENSOR_SCALING, .num_i2c_devs = 0, .num_inputs = 1, .inputs = { { .video_input = 0, .name = "Composite", }, }, }, }; #endif static const struct go7007_usb_board board_endura = { .flags = 0, .main_info = { .flags = 0, .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_I2S_MASTER | GO7007_AUDIO_WORD_16, .audio_rate = 8000, .audio_bclk_div = 48, .audio_main_div = 8, .hpi_buffer_cap = 0, .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_TV, .sensor_h_offset = 8, .num_i2c_devs = 0, .num_inputs = 1, .inputs = { { .name = "Camera", }, }, }, }; static const struct go7007_usb_board board_adlink_mpg24 = { .flags = 0, .main_info = { .flags = GO7007_BOARD_USE_ONBOARD_I2C, .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_I2S_MASTER | GO7007_AUDIO_WORD_16, .audio_rate = 48000, .audio_bclk_div = 8, .audio_main_div = 2, .hpi_buffer_cap = 0, .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_TV | GO7007_SENSOR_VBI, .num_i2c_devs = 1, .i2c_devs = { { .type = "tw2804", .addr = 0x00, /* yes, really */ .flags = I2C_CLIENT_TEN, .is_video = 1, }, }, .num_inputs = 1, .inputs = { { .name = "Composite", }, }, }, }; static const struct go7007_usb_board board_sensoray_2250 = { .flags = GO7007_USB_EZUSB | GO7007_USB_EZUSB_I2C, .main_info = { .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_I2S_MASTER | GO7007_AUDIO_WORD_16, .flags = GO7007_BOARD_HAS_AUDIO, .audio_rate = 48000, .audio_bclk_div = 8, .audio_main_div = 2, .hpi_buffer_cap = 7, .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_TV, .num_i2c_devs = 1, .i2c_devs = { { .type = "s2250", .addr = 0x43, .is_video = 1, .is_audio = 1, }, }, .num_inputs = 2, .inputs = { { .video_input = 0, .name = "Composite", }, { .video_input = 1, .name = "S-Video", }, }, .num_aud_inputs = 3, .aud_inputs = { { .audio_input = 0, .name = "Line In", }, { .audio_input = 1, .name = "Mic", }, { .audio_input = 2, .name = "Mic Boost", }, }, }, }; static const struct go7007_usb_board board_ads_usbav_709 = { .flags = GO7007_USB_EZUSB, .main_info = { .flags = GO7007_BOARD_HAS_AUDIO | GO7007_BOARD_USE_ONBOARD_I2C, .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_I2S_MASTER | GO7007_AUDIO_WORD_16, .audio_rate = 48000, .audio_bclk_div = 8, .audio_main_div = 2, .hpi_buffer_cap = 7, .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_TV | GO7007_SENSOR_VBI, .num_i2c_devs = 1, .i2c_devs = { { .type = "tw9906", .is_video = 1, .addr = 0x44, }, }, .num_inputs = 2, .inputs = { { .video_input = 0, .name = "Composite", }, { .video_input = 10, .name = "S-Video", }, }, }, }; static const struct usb_device_id go7007_usb_id_table[] = { { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0eb1, /* Vendor ID of WIS Technologies */ .idProduct = 0x7007, /* Product ID of GO7007SB chip */ .bcdDevice_lo = 0x200, /* Revision number of XMen */ .bcdDevice_hi = 0x200, .bInterfaceClass = 255, .bInterfaceSubClass = 0, .bInterfaceProtocol = 255, .driver_info = (kernel_ulong_t)GO7007_BOARDID_XMEN, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION, .idVendor = 0x0eb1, /* Vendor ID of WIS Technologies */ .idProduct = 0x7007, /* Product ID of GO7007SB chip */ .bcdDevice_lo = 0x202, /* Revision number of Matrix II */ .bcdDevice_hi = 0x202, .driver_info = (kernel_ulong_t)GO7007_BOARDID_MATRIX_II, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION, .idVendor = 0x0eb1, /* Vendor ID of WIS Technologies */ .idProduct = 0x7007, /* Product ID of GO7007SB chip */ .bcdDevice_lo = 0x204, /* Revision number of Matrix */ .bcdDevice_hi = 0x204, /* Reloaded */ .driver_info = (kernel_ulong_t)GO7007_BOARDID_MATRIX_RELOAD, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0eb1, /* Vendor ID of WIS Technologies */ .idProduct = 0x7007, /* Product ID of GO7007SB chip */ .bcdDevice_lo = 0x205, /* Revision number of XMen-II */ .bcdDevice_hi = 0x205, .bInterfaceClass = 255, .bInterfaceSubClass = 0, .bInterfaceProtocol = 255, .driver_info = (kernel_ulong_t)GO7007_BOARDID_XMEN_II, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION, .idVendor = 0x0eb1, /* Vendor ID of WIS Technologies */ .idProduct = 0x7007, /* Product ID of GO7007SB chip */ .bcdDevice_lo = 0x208, /* Revision number of Star Trek */ .bcdDevice_hi = 0x208, .driver_info = (kernel_ulong_t)GO7007_BOARDID_STAR_TREK, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0eb1, /* Vendor ID of WIS Technologies */ .idProduct = 0x7007, /* Product ID of GO7007SB chip */ .bcdDevice_lo = 0x209, /* Revision number of XMen-III */ .bcdDevice_hi = 0x209, .bInterfaceClass = 255, .bInterfaceSubClass = 0, .bInterfaceProtocol = 255, .driver_info = (kernel_ulong_t)GO7007_BOARDID_XMEN_III, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION, .idVendor = 0x0eb1, /* Vendor ID of WIS Technologies */ .idProduct = 0x7007, /* Product ID of GO7007SB chip */ .bcdDevice_lo = 0x210, /* Revision number of Matrix */ .bcdDevice_hi = 0x210, /* Revolution */ .driver_info = (kernel_ulong_t)GO7007_BOARDID_MATRIX_REV, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION, .idVendor = 0x093b, /* Vendor ID of Plextor */ .idProduct = 0xa102, /* Product ID of M402U */ .bcdDevice_lo = 0x1, /* revision number of Blueberry */ .bcdDevice_hi = 0x1, .driver_info = (kernel_ulong_t)GO7007_BOARDID_PX_M402U, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION, .idVendor = 0x093b, /* Vendor ID of Plextor */ .idProduct = 0xa104, /* Product ID of TV402U */ .bcdDevice_lo = 0x1, .bcdDevice_hi = 0x1, .driver_info = (kernel_ulong_t)GO7007_BOARDID_PX_TV402U, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION, .idVendor = 0x10fd, /* Vendor ID of Anubis Electronics */ .idProduct = 0xde00, /* Product ID of Lifeview LR192 */ .bcdDevice_lo = 0x1, .bcdDevice_hi = 0x1, .driver_info = (kernel_ulong_t)GO7007_BOARDID_LIFEVIEW_LR192, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION, .idVendor = 0x1943, /* Vendor ID Sensoray */ .idProduct = 0x2250, /* Product ID of 2250/2251 */ .bcdDevice_lo = 0x1, .bcdDevice_hi = 0x1, .driver_info = (kernel_ulong_t)GO7007_BOARDID_SENSORAY_2250, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION, .idVendor = 0x06e1, /* Vendor ID of ADS Technologies */ .idProduct = 0x0709, /* Product ID of DVD Xpress DX2 */ .bcdDevice_lo = 0x204, .bcdDevice_hi = 0x204, .driver_info = (kernel_ulong_t)GO7007_BOARDID_ADS_USBAV_709, }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, go7007_usb_id_table); /********************* Driver for EZ-USB HPI interface *********************/ static int go7007_usb_vendor_request(struct go7007 *go, int request, int value, int index, void *transfer_buffer, int length, int in) { struct go7007_usb *usb = go->hpi_context; int timeout = 5000; if (in) { return usb_control_msg(usb->usbdev, usb_rcvctrlpipe(usb->usbdev, 0), request, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, value, index, transfer_buffer, length, timeout); } else { return usb_control_msg(usb->usbdev, usb_sndctrlpipe(usb->usbdev, 0), request, USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, transfer_buffer, length, timeout); } } static int go7007_usb_interface_reset(struct go7007 *go) { struct go7007_usb *usb = go->hpi_context; u16 intr_val, intr_data; if (go->status == STATUS_SHUTDOWN) return -1; /* Reset encoder */ if (go7007_write_interrupt(go, 0x0001, 0x0001) < 0) return -1; msleep(100); if (usb->board->flags & GO7007_USB_EZUSB) { /* Reset buffer in EZ-USB */ pr_debug("resetting EZ-USB buffers\n"); if (go7007_usb_vendor_request(go, 0x10, 0, 0, NULL, 0, 0) < 0 || go7007_usb_vendor_request(go, 0x10, 0, 0, NULL, 0, 0) < 0) return -1; /* Reset encoder again */ if (go7007_write_interrupt(go, 0x0001, 0x0001) < 0) return -1; msleep(100); } /* Wait for an interrupt to indicate successful hardware reset */ if (go7007_read_interrupt(go, &intr_val, &intr_data) < 0 || (intr_val & ~0x1) != 0x55aa) { dev_err(go->dev, "unable to reset the USB interface\n"); return -1; } return 0; } static int go7007_usb_ezusb_write_interrupt(struct go7007 *go, int addr, int data) { struct go7007_usb *usb = go->hpi_context; int i, r; u16 status_reg = 0; int timeout = 500; pr_debug("WriteInterrupt: %04x %04x\n", addr, data); for (i = 0; i < 100; ++i) { r = usb_control_msg(usb->usbdev, usb_rcvctrlpipe(usb->usbdev, 0), 0x14, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, 0, HPI_STATUS_ADDR, go->usb_buf, sizeof(status_reg), timeout); if (r < 0) break; status_reg = le16_to_cpu(*((__le16 *)go->usb_buf)); if (!(status_reg & 0x0010)) break; msleep(10); } if (r < 0) goto write_int_error; if (i == 100) { dev_err(go->dev, "device is hung, status reg = 0x%04x\n", status_reg); return -1; } r = usb_control_msg(usb->usbdev, usb_sndctrlpipe(usb->usbdev, 0), 0x12, USB_TYPE_VENDOR | USB_RECIP_DEVICE, data, INT_PARAM_ADDR, NULL, 0, timeout); if (r < 0) goto write_int_error; r = usb_control_msg(usb->usbdev, usb_sndctrlpipe(usb->usbdev, 0), 0x12, USB_TYPE_VENDOR | USB_RECIP_DEVICE, addr, INT_INDEX_ADDR, NULL, 0, timeout); if (r < 0) goto write_int_error; return 0; write_int_error: dev_err(go->dev, "error in WriteInterrupt: %d\n", r); return r; } static int go7007_usb_onboard_write_interrupt(struct go7007 *go, int addr, int data) { struct go7007_usb *usb = go->hpi_context; int r; int timeout = 500; pr_debug("WriteInterrupt: %04x %04x\n", addr, data); go->usb_buf[0] = data & 0xff; go->usb_buf[1] = data >> 8; go->usb_buf[2] = addr & 0xff; go->usb_buf[3] = addr >> 8; go->usb_buf[4] = go->usb_buf[5] = go->usb_buf[6] = go->usb_buf[7] = 0; r = usb_control_msg(usb->usbdev, usb_sndctrlpipe(usb->usbdev, 2), 0x00, USB_TYPE_VENDOR | USB_RECIP_ENDPOINT, 0x55aa, 0xf0f0, go->usb_buf, 8, timeout); if (r < 0) { dev_err(go->dev, "error in WriteInterrupt: %d\n", r); return r; } return 0; } static void go7007_usb_readinterrupt_complete(struct urb *urb) { struct go7007 *go = (struct go7007 *)urb->context; __le16 *regs = (__le16 *)urb->transfer_buffer; int status = urb->status; if (status) { if (status != -ESHUTDOWN && go->status != STATUS_SHUTDOWN) { dev_err(go->dev, "error in read interrupt: %d\n", urb->status); } else { wake_up(&go->interrupt_waitq); return; } } else if (urb->actual_length != urb->transfer_buffer_length) { dev_err(go->dev, "short read in interrupt pipe!\n"); } else { go->interrupt_available = 1; go->interrupt_data = __le16_to_cpu(regs[0]); go->interrupt_value = __le16_to_cpu(regs[1]); pr_debug("ReadInterrupt: %04x %04x\n", go->interrupt_value, go->interrupt_data); } wake_up(&go->interrupt_waitq); } static int go7007_usb_read_interrupt(struct go7007 *go) { struct go7007_usb *usb = go->hpi_context; int r; r = usb_submit_urb(usb->intr_urb, GFP_KERNEL); if (r < 0) { dev_err(go->dev, "unable to submit interrupt urb: %d\n", r); return r; } return 0; } static void go7007_usb_read_video_pipe_complete(struct urb *urb) { struct go7007 *go = (struct go7007 *)urb->context; int r, status = urb->status; if (!vb2_is_streaming(&go->vidq)) { wake_up_interruptible(&go->frame_waitq); return; } if (status) { dev_err(go->dev, "error in video pipe: %d\n", status); return; } if (urb->actual_length != urb->transfer_buffer_length) { dev_err(go->dev, "short read in video pipe!\n"); return; } go7007_parse_video_stream(go, urb->transfer_buffer, urb->actual_length); r = usb_submit_urb(urb, GFP_ATOMIC); if (r < 0) dev_err(go->dev, "error in video pipe: %d\n", r); } static void go7007_usb_read_audio_pipe_complete(struct urb *urb) { struct go7007 *go = (struct go7007 *)urb->context; int r, status = urb->status; if (!vb2_is_streaming(&go->vidq)) return; if (status) { dev_err(go->dev, "error in audio pipe: %d\n", status); return; } if (urb->actual_length != urb->transfer_buffer_length) { dev_err(go->dev, "short read in audio pipe!\n"); return; } if (go->audio_deliver != NULL) go->audio_deliver(go, urb->transfer_buffer, urb->actual_length); r = usb_submit_urb(urb, GFP_ATOMIC); if (r < 0) dev_err(go->dev, "error in audio pipe: %d\n", r); } static int go7007_usb_stream_start(struct go7007 *go) { struct go7007_usb *usb = go->hpi_context; int i, r; for (i = 0; i < 8; ++i) { r = usb_submit_urb(usb->video_urbs[i], GFP_KERNEL); if (r < 0) { dev_err(go->dev, "error submitting video urb %d: %d\n", i, r); goto video_submit_failed; } } if (!go->audio_enabled) return 0; for (i = 0; i < 8; ++i) { r = usb_submit_urb(usb->audio_urbs[i], GFP_KERNEL); if (r < 0) { dev_err(go->dev, "error submitting audio urb %d: %d\n", i, r); goto audio_submit_failed; } } return 0; audio_submit_failed: for (i = 0; i < 7; ++i) usb_kill_urb(usb->audio_urbs[i]); video_submit_failed: for (i = 0; i < 8; ++i) usb_kill_urb(usb->video_urbs[i]); return -1; } static int go7007_usb_stream_stop(struct go7007 *go) { struct go7007_usb *usb = go->hpi_context; int i; if (go->status == STATUS_SHUTDOWN) return 0; for (i = 0; i < 8; ++i) usb_kill_urb(usb->video_urbs[i]); if (go->audio_enabled) for (i = 0; i < 8; ++i) usb_kill_urb(usb->audio_urbs[i]); return 0; } static int go7007_usb_send_firmware(struct go7007 *go, u8 *data, int len) { struct go7007_usb *usb = go->hpi_context; int transferred, pipe; int timeout = 500; pr_debug("DownloadBuffer sending %d bytes\n", len); if (usb->board->flags & GO7007_USB_EZUSB) pipe = usb_sndbulkpipe(usb->usbdev, 2); else pipe = usb_sndbulkpipe(usb->usbdev, 3); return usb_bulk_msg(usb->usbdev, pipe, data, len, &transferred, timeout); } static void go7007_usb_release(struct go7007 *go) { struct go7007_usb *usb = go->hpi_context; struct urb *vurb, *aurb; int i; if (usb->intr_urb) { usb_kill_urb(usb->intr_urb); kfree(usb->intr_urb->transfer_buffer); usb_free_urb(usb->intr_urb); } /* Free USB-related structs */ for (i = 0; i < 8; ++i) { vurb = usb->video_urbs[i]; if (vurb) { usb_kill_urb(vurb); kfree(vurb->transfer_buffer); usb_free_urb(vurb); } aurb = usb->audio_urbs[i]; if (aurb) { usb_kill_urb(aurb); kfree(aurb->transfer_buffer); usb_free_urb(aurb); } } kfree(go->hpi_context); } static const struct go7007_hpi_ops go7007_usb_ezusb_hpi_ops = { .interface_reset = go7007_usb_interface_reset, .write_interrupt = go7007_usb_ezusb_write_interrupt, .read_interrupt = go7007_usb_read_interrupt, .stream_start = go7007_usb_stream_start, .stream_stop = go7007_usb_stream_stop, .send_firmware = go7007_usb_send_firmware, .release = go7007_usb_release, }; static const struct go7007_hpi_ops go7007_usb_onboard_hpi_ops = { .interface_reset = go7007_usb_interface_reset, .write_interrupt = go7007_usb_onboard_write_interrupt, .read_interrupt = go7007_usb_read_interrupt, .stream_start = go7007_usb_stream_start, .stream_stop = go7007_usb_stream_stop, .send_firmware = go7007_usb_send_firmware, .release = go7007_usb_release, }; /********************* Driver for EZ-USB I2C adapter *********************/ static int go7007_usb_i2c_master_xfer(struct i2c_adapter *adapter, struct i2c_msg msgs[], int num) { struct go7007 *go = i2c_get_adapdata(adapter); struct go7007_usb *usb = go->hpi_context; u8 *buf = go->usb_buf; int buf_len, i; int ret = -EIO; if (go->status == STATUS_SHUTDOWN) return -ENODEV; mutex_lock(&usb->i2c_lock); for (i = 0; i < num; ++i) { /* The hardware command is "write some bytes then read some * bytes", so we try to coalesce a write followed by a read * into a single USB transaction */ if (i + 1 < num && msgs[i].addr == msgs[i + 1].addr && !(msgs[i].flags & I2C_M_RD) && (msgs[i + 1].flags & I2C_M_RD)) { #ifdef GO7007_I2C_DEBUG pr_debug("i2c write/read %d/%d bytes on %02x\n", msgs[i].len, msgs[i + 1].len, msgs[i].addr); #endif buf[0] = 0x01; buf[1] = msgs[i].len + 1; buf[2] = msgs[i].addr << 1; memcpy(&buf[3], msgs[i].buf, msgs[i].len); buf_len = msgs[i].len + 3; buf[buf_len++] = msgs[++i].len; } else if (msgs[i].flags & I2C_M_RD) { #ifdef GO7007_I2C_DEBUG pr_debug("i2c read %d bytes on %02x\n", msgs[i].len, msgs[i].addr); #endif buf[0] = 0x01; buf[1] = 1; buf[2] = msgs[i].addr << 1; buf[3] = msgs[i].len; buf_len = 4; } else { #ifdef GO7007_I2C_DEBUG pr_debug("i2c write %d bytes on %02x\n", msgs[i].len, msgs[i].addr); #endif buf[0] = 0x00; buf[1] = msgs[i].len + 1; buf[2] = msgs[i].addr << 1; memcpy(&buf[3], msgs[i].buf, msgs[i].len); buf_len = msgs[i].len + 3; buf[buf_len++] = 0; } if (go7007_usb_vendor_request(go, 0x24, 0, 0, buf, buf_len, 0) < 0) goto i2c_done; if (msgs[i].flags & I2C_M_RD) { memset(buf, 0, msgs[i].len + 1); if (go7007_usb_vendor_request(go, 0x25, 0, 0, buf, msgs[i].len + 1, 1) < 0) goto i2c_done; memcpy(msgs[i].buf, buf + 1, msgs[i].len); } } ret = num; i2c_done: mutex_unlock(&usb->i2c_lock); return ret; } static u32 go7007_usb_functionality(struct i2c_adapter *adapter) { /* No errors are reported by the hardware, so we don't bother * supporting quick writes to avoid confusing probing */ return (I2C_FUNC_SMBUS_EMUL) & ~I2C_FUNC_SMBUS_QUICK; } static const struct i2c_algorithm go7007_usb_algo = { .master_xfer = go7007_usb_i2c_master_xfer, .functionality = go7007_usb_functionality, }; static struct i2c_adapter go7007_usb_adap_templ = { .owner = THIS_MODULE, .name = "WIS GO7007SB EZ-USB", .algo = &go7007_usb_algo, }; /********************* USB add/remove functions *********************/ static int go7007_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct go7007 *go; struct go7007_usb *usb; const struct go7007_usb_board *board; struct usb_device *usbdev = interface_to_usbdev(intf); struct usb_host_endpoint *ep; unsigned num_i2c_devs; char *name; int video_pipe, i, v_urb_len; pr_debug("probing new GO7007 USB board\n"); switch (id->driver_info) { case GO7007_BOARDID_MATRIX_II: name = "WIS Matrix II or compatible"; board = &board_matrix_ii; break; case GO7007_BOARDID_MATRIX_RELOAD: name = "WIS Matrix Reloaded or compatible"; board = &board_matrix_reload; break; case GO7007_BOARDID_MATRIX_REV: name = "WIS Matrix Revolution or compatible"; board = &board_matrix_revolution; break; case GO7007_BOARDID_STAR_TREK: name = "WIS Star Trek or compatible"; board = &board_star_trek; break; case GO7007_BOARDID_XMEN: name = "WIS XMen or compatible"; board = &board_xmen; break; case GO7007_BOARDID_XMEN_II: name = "WIS XMen II or compatible"; board = &board_xmen; break; case GO7007_BOARDID_XMEN_III: name = "WIS XMen III or compatible"; board = &board_xmen; break; case GO7007_BOARDID_PX_M402U: name = "Plextor PX-M402U"; board = &board_matrix_ii; break; case GO7007_BOARDID_PX_TV402U: name = "Plextor PX-TV402U (unknown tuner)"; board = &board_px_tv402u; break; case GO7007_BOARDID_LIFEVIEW_LR192: dev_err(&intf->dev, "The Lifeview TV Walker Ultra is not supported. Sorry!\n"); return -ENODEV; #if 0 name = "Lifeview TV Walker Ultra"; board = &board_lifeview_lr192; #endif break; case GO7007_BOARDID_SENSORAY_2250: dev_info(&intf->dev, "Sensoray 2250 found\n"); name = "Sensoray 2250/2251"; board = &board_sensoray_2250; break; case GO7007_BOARDID_ADS_USBAV_709: name = "ADS Tech DVD Xpress DX2"; board = &board_ads_usbav_709; break; default: dev_err(&intf->dev, "unknown board ID %d!\n", (unsigned int)id->driver_info); return -ENODEV; } go = go7007_alloc(&board->main_info, &intf->dev); if (go == NULL) return -ENOMEM; usb = kzalloc(sizeof(struct go7007_usb), GFP_KERNEL); if (usb == NULL) { kfree(go); return -ENOMEM; } usb->board = board; usb->usbdev = usbdev; usb_make_path(usbdev, go->bus_info, sizeof(go->bus_info)); go->board_id = id->driver_info; strscpy(go->name, name, sizeof(go->name)); if (board->flags & GO7007_USB_EZUSB) go->hpi_ops = &go7007_usb_ezusb_hpi_ops; else go->hpi_ops = &go7007_usb_onboard_hpi_ops; go->hpi_context = usb; ep = usb->usbdev->ep_in[4]; if (!ep) goto allocfail; /* Allocate the URB and buffer for receiving incoming interrupts */ usb->intr_urb = usb_alloc_urb(0, GFP_KERNEL); if (usb->intr_urb == NULL) goto allocfail; usb->intr_urb->transfer_buffer = kmalloc_array(2, sizeof(u16), GFP_KERNEL); if (usb->intr_urb->transfer_buffer == NULL) goto allocfail; if (usb_endpoint_type(&ep->desc) == USB_ENDPOINT_XFER_BULK) usb_fill_bulk_urb(usb->intr_urb, usb->usbdev, usb_rcvbulkpipe(usb->usbdev, 4), usb->intr_urb->transfer_buffer, 2*sizeof(u16), go7007_usb_readinterrupt_complete, go); else usb_fill_int_urb(usb->intr_urb, usb->usbdev, usb_rcvintpipe(usb->usbdev, 4), usb->intr_urb->transfer_buffer, 2*sizeof(u16), go7007_usb_readinterrupt_complete, go, 8); usb_set_intfdata(intf, &go->v4l2_dev); /* Boot the GO7007 */ if (go7007_boot_encoder(go, go->board_info->flags & GO7007_BOARD_USE_ONBOARD_I2C) < 0) goto allocfail; /* Register the EZ-USB I2C adapter, if we're using it */ if (board->flags & GO7007_USB_EZUSB_I2C) { memcpy(&go->i2c_adapter, &go7007_usb_adap_templ, sizeof(go7007_usb_adap_templ)); mutex_init(&usb->i2c_lock); go->i2c_adapter.dev.parent = go->dev; i2c_set_adapdata(&go->i2c_adapter, go); if (i2c_add_adapter(&go->i2c_adapter) < 0) { dev_err(go->dev, "error: i2c_add_adapter failed\n"); goto allocfail; } go->i2c_adapter_online = 1; } /* Pelco and Adlink reused the XMen and XMen-III vendor and product * IDs for their own incompatible designs. We can detect XMen boards * by probing the sensor, but there is no way to probe the sensors on * the Pelco and Adlink designs so we default to the Adlink. If it * is actually a Pelco, the user must set the assume_endura module * parameter. */ if ((go->board_id == GO7007_BOARDID_XMEN || go->board_id == GO7007_BOARDID_XMEN_III) && go->i2c_adapter_online) { union i2c_smbus_data data; /* Check to see if register 0x0A is 0x76 */ i2c_smbus_xfer(&go->i2c_adapter, 0x21, I2C_CLIENT_SCCB, I2C_SMBUS_READ, 0x0A, I2C_SMBUS_BYTE_DATA, &data); if (data.byte != 0x76) { if (assume_endura) { go->board_id = GO7007_BOARDID_ENDURA; usb->board = board = &board_endura; go->board_info = &board->main_info; strscpy(go->name, "Pelco Endura", sizeof(go->name)); } else { u16 channel; /* read channel number from GPIO[1:0] */ if (go7007_read_addr(go, 0x3c81, &channel)) goto allocfail; channel &= 0x3; go->board_id = GO7007_BOARDID_ADLINK_MPG24; usb->board = board = &board_adlink_mpg24; go->board_info = &board->main_info; go->channel_number = channel; snprintf(go->name, sizeof(go->name), "Adlink PCI-MPG24, channel #%d", channel); } go7007_update_board(go); } } num_i2c_devs = go->board_info->num_i2c_devs; /* Probe the tuner model on the TV402U */ if (go->board_id == GO7007_BOARDID_PX_TV402U) { /* Board strapping indicates tuner model */ if (go7007_usb_vendor_request(go, 0x41, 0, 0, go->usb_buf, 3, 1) < 0) { dev_err(go->dev, "GPIO read failed!\n"); goto allocfail; } switch (go->usb_buf[0] >> 6) { case 1: go->tuner_type = TUNER_SONY_BTF_PG472Z; go->std = V4L2_STD_PAL; strscpy(go->name, "Plextor PX-TV402U-EU", sizeof(go->name)); break; case 2: go->tuner_type = TUNER_SONY_BTF_PK467Z; go->std = V4L2_STD_NTSC_M_JP; num_i2c_devs -= 2; strscpy(go->name, "Plextor PX-TV402U-JP", sizeof(go->name)); break; case 3: go->tuner_type = TUNER_SONY_BTF_PB463Z; num_i2c_devs -= 2; strscpy(go->name, "Plextor PX-TV402U-NA", sizeof(go->name)); break; default: pr_debug("unable to detect tuner type!\n"); break; } /* Configure tuner mode selection inputs connected * to the EZ-USB GPIO output pins */ if (go7007_usb_vendor_request(go, 0x40, 0x7f02, 0, NULL, 0, 0) < 0) { dev_err(go->dev, "GPIO write failed!\n"); goto allocfail; } } /* Print a nasty message if the user attempts to use a USB2.0 device in * a USB1.1 port. There will be silent corruption of the stream. */ if ((board->flags & GO7007_USB_EZUSB) && usbdev->speed != USB_SPEED_HIGH) dev_err(go->dev, "*** WARNING *** This device must be connected to a USB 2.0 port! Attempting to capture video through a USB 1.1 port will result in stream corruption, even at low bitrates!\n"); /* Allocate the URBs and buffers for receiving the video stream */ if (board->flags & GO7007_USB_EZUSB) { if (!usb->usbdev->ep_in[6]) goto allocfail; v_urb_len = 1024; video_pipe = usb_rcvbulkpipe(usb->usbdev, 6); } else { if (!usb->usbdev->ep_in[1]) goto allocfail; v_urb_len = 512; video_pipe = usb_rcvbulkpipe(usb->usbdev, 1); } for (i = 0; i < 8; ++i) { usb->video_urbs[i] = usb_alloc_urb(0, GFP_KERNEL); if (usb->video_urbs[i] == NULL) goto allocfail; usb->video_urbs[i]->transfer_buffer = kmalloc(v_urb_len, GFP_KERNEL); if (usb->video_urbs[i]->transfer_buffer == NULL) goto allocfail; usb_fill_bulk_urb(usb->video_urbs[i], usb->usbdev, video_pipe, usb->video_urbs[i]->transfer_buffer, v_urb_len, go7007_usb_read_video_pipe_complete, go); } /* Allocate the URBs and buffers for receiving the audio stream */ if ((board->flags & GO7007_USB_EZUSB) && (board->main_info.flags & GO7007_BOARD_HAS_AUDIO)) { if (!usb->usbdev->ep_in[8]) goto allocfail; for (i = 0; i < 8; ++i) { usb->audio_urbs[i] = usb_alloc_urb(0, GFP_KERNEL); if (usb->audio_urbs[i] == NULL) goto allocfail; usb->audio_urbs[i]->transfer_buffer = kmalloc(4096, GFP_KERNEL); if (usb->audio_urbs[i]->transfer_buffer == NULL) goto allocfail; usb_fill_bulk_urb(usb->audio_urbs[i], usb->usbdev, usb_rcvbulkpipe(usb->usbdev, 8), usb->audio_urbs[i]->transfer_buffer, 4096, go7007_usb_read_audio_pipe_complete, go); } } /* Do any final GO7007 initialization, then register the * V4L2 and ALSA interfaces */ if (go7007_register_encoder(go, num_i2c_devs) < 0) goto allocfail; go->status = STATUS_ONLINE; return 0; allocfail: go7007_usb_release(go); kfree(go); return -ENOMEM; } static void go7007_usb_disconnect(struct usb_interface *intf) { struct go7007 *go = to_go7007(usb_get_intfdata(intf)); mutex_lock(&go->queue_lock); mutex_lock(&go->serialize_lock); if (go->audio_enabled) go7007_snd_remove(go); go->status = STATUS_SHUTDOWN; v4l2_device_disconnect(&go->v4l2_dev); video_unregister_device(&go->vdev); mutex_unlock(&go->serialize_lock); mutex_unlock(&go->queue_lock); v4l2_device_put(&go->v4l2_dev); } static struct usb_driver go7007_usb_driver = { .name = "go7007", .probe = go7007_usb_probe, .disconnect = go7007_usb_disconnect, .id_table = go7007_usb_id_table, }; module_usb_driver(go7007_usb_driver); MODULE_DESCRIPTION("WIS GO7007 USB support"); MODULE_LICENSE("GPL v2");
5 16 61 103 8 3 4 89 2 49 69 67 68 69 69 16 8 40 1 7 57 2 62 10 2 9 12 1 11 2 2 1 1 9 1 8 8 8 26 21 2 13 18 5 1 1 1 2 8 1 1 1 6 6 6 5 3 6 6 7 7 2 5 6 1 2 3 18 4 1 13 3 1 1 1 44 1 1 44 14 10 2 8 1 5 2 8 27 1 2 15 2 6 6 3 13 1 1 11 10 1 4 6 3 1 2 7 4 4 3 3 3 4 11 2 9 3 3 3 2 2 4 1 3 3 3 2 5 2 3 5 9 3 3 4 3 4 9 3 2 4 2 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com * Copyright (c) 2016,2017 Facebook */ #include <linux/bpf.h> #include <linux/btf.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/mm.h> #include <linux/filter.h> #include <linux/perf_event.h> #include <uapi/linux/btf.h> #include <linux/rcupdate_trace.h> #include <linux/btf_ids.h> #include "map_in_map.h" #define ARRAY_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \ BPF_F_PRESERVE_ELEMS | BPF_F_INNER_MAP) static void bpf_array_free_percpu(struct bpf_array *array) { int i; for (i = 0; i < array->map.max_entries; i++) { free_percpu(array->pptrs[i]); cond_resched(); } } static int bpf_array_alloc_percpu(struct bpf_array *array) { void __percpu *ptr; int i; for (i = 0; i < array->map.max_entries; i++) { ptr = bpf_map_alloc_percpu(&array->map, array->elem_size, 8, GFP_USER | __GFP_NOWARN); if (!ptr) { bpf_array_free_percpu(array); return -ENOMEM; } array->pptrs[i] = ptr; cond_resched(); } return 0; } /* Called from syscall */ int array_map_alloc_check(union bpf_attr *attr) { bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; int numa_node = bpf_map_attr_numa_node(attr); /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || attr->value_size == 0 || attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || !bpf_map_flags_access_ok(attr->map_flags) || (percpu && numa_node != NUMA_NO_NODE)) return -EINVAL; if (attr->map_type != BPF_MAP_TYPE_ARRAY && attr->map_flags & (BPF_F_MMAPABLE | BPF_F_INNER_MAP)) return -EINVAL; if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY && attr->map_flags & BPF_F_PRESERVE_ELEMS) return -EINVAL; /* avoid overflow on round_up(map->value_size) */ if (attr->value_size > INT_MAX) return -E2BIG; /* percpu map value size is bound by PCPU_MIN_UNIT_SIZE */ if (percpu && round_up(attr->value_size, 8) > PCPU_MIN_UNIT_SIZE) return -E2BIG; return 0; } static struct bpf_map *array_map_alloc(union bpf_attr *attr) { bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; int numa_node = bpf_map_attr_numa_node(attr); u32 elem_size, index_mask, max_entries; bool bypass_spec_v1 = bpf_bypass_spec_v1(NULL); u64 array_size, mask64; struct bpf_array *array; elem_size = round_up(attr->value_size, 8); max_entries = attr->max_entries; /* On 32 bit archs roundup_pow_of_two() with max_entries that has * upper most bit set in u32 space is undefined behavior due to * resulting 1U << 32, so do it manually here in u64 space. */ mask64 = fls_long(max_entries - 1); mask64 = 1ULL << mask64; mask64 -= 1; index_mask = mask64; if (!bypass_spec_v1) { /* round up array size to nearest power of 2, * since cpu will speculate within index_mask limits */ max_entries = index_mask + 1; /* Check for overflows. */ if (max_entries < attr->max_entries) return ERR_PTR(-E2BIG); } array_size = sizeof(*array); if (percpu) { array_size += (u64) max_entries * sizeof(void *); } else { /* rely on vmalloc() to return page-aligned memory and * ensure array->value is exactly page-aligned */ if (attr->map_flags & BPF_F_MMAPABLE) { array_size = PAGE_ALIGN(array_size); array_size += PAGE_ALIGN((u64) max_entries * elem_size); } else { array_size += (u64) max_entries * elem_size; } } /* allocate all map elements and zero-initialize them */ if (attr->map_flags & BPF_F_MMAPABLE) { void *data; /* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */ data = bpf_map_area_mmapable_alloc(array_size, numa_node); if (!data) return ERR_PTR(-ENOMEM); array = data + PAGE_ALIGN(sizeof(struct bpf_array)) - offsetof(struct bpf_array, value); } else { array = bpf_map_area_alloc(array_size, numa_node); } if (!array) return ERR_PTR(-ENOMEM); array->index_mask = index_mask; array->map.bypass_spec_v1 = bypass_spec_v1; /* copy mandatory map attributes */ bpf_map_init_from_attr(&array->map, attr); array->elem_size = elem_size; if (percpu && bpf_array_alloc_percpu(array)) { bpf_map_area_free(array); return ERR_PTR(-ENOMEM); } return &array->map; } static void *array_map_elem_ptr(struct bpf_array* array, u32 index) { return array->value + (u64)array->elem_size * index; } /* Called from syscall or from eBPF program */ static void *array_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; if (unlikely(index >= array->map.max_entries)) return NULL; return array->value + (u64)array->elem_size * (index & array->index_mask); } static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm, u32 off) { struct bpf_array *array = container_of(map, struct bpf_array, map); if (map->max_entries != 1) return -ENOTSUPP; if (off >= map->value_size) return -EINVAL; *imm = (unsigned long)array->value; return 0; } static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm, u32 *off) { struct bpf_array *array = container_of(map, struct bpf_array, map); u64 base = (unsigned long)array->value; u64 range = array->elem_size; if (map->max_entries != 1) return -ENOTSUPP; if (imm < base || imm >= base + range) return -ENOENT; *off = imm - base; return 0; } /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */ static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_insn *insn = insn_buf; u32 elem_size = array->elem_size; const int ret = BPF_REG_0; const int map_ptr = BPF_REG_1; const int index = BPF_REG_2; if (map->map_flags & BPF_F_INNER_MAP) return -EOPNOTSUPP; *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); if (!map->bypass_spec_v1) { *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4); *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); } else { *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3); } if (is_power_of_2(elem_size)) { *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); } else { *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); } *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); *insn++ = BPF_MOV64_IMM(ret, 0); return insn - insn_buf; } /* Called from eBPF program */ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; if (unlikely(index >= array->map.max_entries)) return NULL; return this_cpu_ptr(array->pptrs[index & array->index_mask]); } /* emit BPF instructions equivalent to C code of percpu_array_map_lookup_elem() */ static int percpu_array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_insn *insn = insn_buf; if (!bpf_jit_supports_percpu_insn()) return -EOPNOTSUPP; if (map->map_flags & BPF_F_INNER_MAP) return -EOPNOTSUPP; BUILD_BUG_ON(offsetof(struct bpf_array, map) != 0); *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct bpf_array, pptrs)); *insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0); if (!map->bypass_spec_v1) { *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 6); *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_0, array->index_mask); } else { *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 5); } *insn++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3); *insn++ = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1); *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0); *insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); *insn++ = BPF_MOV64_IMM(BPF_REG_0, 0); return insn - insn_buf; } static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; if (cpu >= nr_cpu_ids) return NULL; if (unlikely(index >= array->map.max_entries)) return NULL; return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu); } int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; void __percpu *pptr; int cpu, off = 0; u32 size; if (unlikely(index >= array->map.max_entries)) return -ENOENT; /* per_cpu areas are zero-filled and bpf programs can only * access 'value_size' of them, so copying rounded areas * will not leak any kernel data */ size = array->elem_size; rcu_read_lock(); pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); check_and_init_map_value(map, value + off); off += size; } rcu_read_unlock(); return 0; } /* Called from syscall */ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = key ? *(u32 *)key : U32_MAX; u32 *next = (u32 *)next_key; if (index >= array->map.max_entries) { *next = 0; return 0; } if (index == array->map.max_entries - 1) return -ENOENT; *next = index + 1; return 0; } /* Called from syscall or from eBPF program */ static long array_map_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; char *val; if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) /* unknown flags */ return -EINVAL; if (unlikely(index >= array->map.max_entries)) /* all elements were pre-allocated, cannot insert a new one */ return -E2BIG; if (unlikely(map_flags & BPF_NOEXIST)) /* all elements already exist */ return -EEXIST; if (unlikely((map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))) return -EINVAL; if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { val = this_cpu_ptr(array->pptrs[index & array->index_mask]); copy_map_value(map, val, value); bpf_obj_free_fields(array->map.record, val); } else { val = array->value + (u64)array->elem_size * (index & array->index_mask); if (map_flags & BPF_F_LOCK) copy_map_value_locked(map, val, value, false); else copy_map_value(map, val, value); bpf_obj_free_fields(array->map.record, val); } return 0; } int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; void __percpu *pptr; int cpu, off = 0; u32 size; if (unlikely(map_flags > BPF_EXIST)) /* unknown flags */ return -EINVAL; if (unlikely(index >= array->map.max_entries)) /* all elements were pre-allocated, cannot insert a new one */ return -E2BIG; if (unlikely(map_flags == BPF_NOEXIST)) /* all elements already exist */ return -EEXIST; /* the user space will provide round_up(value_size, 8) bytes that * will be copied into per-cpu area. bpf programs can only access * value_size of it. During lookup the same extra bytes will be * returned or zeros which were zero-filled by percpu_alloc, * so no kernel data leaks possible */ size = array->elem_size; rcu_read_lock(); pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off); bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu)); off += size; } rcu_read_unlock(); return 0; } /* Called from syscall or from eBPF program */ static long array_map_delete_elem(struct bpf_map *map, void *key) { return -EINVAL; } static void *array_map_vmalloc_addr(struct bpf_array *array) { return (void *)round_down((unsigned long)array, PAGE_SIZE); } static void array_map_free_timers_wq(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; /* We don't reset or free fields other than timer and workqueue * on uref dropping to zero. */ if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE)) { for (i = 0; i < array->map.max_entries; i++) { if (btf_record_has_field(map->record, BPF_TIMER)) bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i)); if (btf_record_has_field(map->record, BPF_WORKQUEUE)) bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i)); } } } /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ static void array_map_free(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; if (!IS_ERR_OR_NULL(map->record)) { if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { for (i = 0; i < array->map.max_entries; i++) { void __percpu *pptr = array->pptrs[i & array->index_mask]; int cpu; for_each_possible_cpu(cpu) { bpf_obj_free_fields(map->record, per_cpu_ptr(pptr, cpu)); cond_resched(); } } } else { for (i = 0; i < array->map.max_entries; i++) bpf_obj_free_fields(map->record, array_map_elem_ptr(array, i)); } } if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) bpf_array_free_percpu(array); if (array->map.map_flags & BPF_F_MMAPABLE) bpf_map_area_free(array_map_vmalloc_addr(array)); else bpf_map_area_free(array); } static void array_map_seq_show_elem(struct bpf_map *map, void *key, struct seq_file *m) { void *value; rcu_read_lock(); value = array_map_lookup_elem(map, key); if (!value) { rcu_read_unlock(); return; } if (map->btf_key_type_id) seq_printf(m, "%u: ", *(u32 *)key); btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); seq_putc(m, '\n'); rcu_read_unlock(); } static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key, struct seq_file *m) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; void __percpu *pptr; int cpu; rcu_read_lock(); seq_printf(m, "%u: {\n", *(u32 *)key); pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { seq_printf(m, "\tcpu%d: ", cpu); btf_type_seq_show(map->btf, map->btf_value_type_id, per_cpu_ptr(pptr, cpu), m); seq_putc(m, '\n'); } seq_puts(m, "}\n"); rcu_read_unlock(); } static int array_map_check_btf(const struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) { u32 int_data; /* One exception for keyless BTF: .bss/.data/.rodata map */ if (btf_type_is_void(key_type)) { if (map->map_type != BPF_MAP_TYPE_ARRAY || map->max_entries != 1) return -EINVAL; if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC) return -EINVAL; return 0; } if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) return -EINVAL; int_data = *(u32 *)(key_type + 1); /* bpf array can only take a u32 key. This check makes sure * that the btf matches the attr used during map_create. */ if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data)) return -EINVAL; return 0; } static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) { struct bpf_array *array = container_of(map, struct bpf_array, map); pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT; if (!(map->map_flags & BPF_F_MMAPABLE)) return -EINVAL; if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) > PAGE_ALIGN((u64)array->map.max_entries * array->elem_size)) return -EINVAL; return remap_vmalloc_range(vma, array_map_vmalloc_addr(array), vma->vm_pgoff + pgoff); } static bool array_map_meta_equal(const struct bpf_map *meta0, const struct bpf_map *meta1) { if (!bpf_map_meta_equal(meta0, meta1)) return false; return meta0->map_flags & BPF_F_INNER_MAP ? true : meta0->max_entries == meta1->max_entries; } struct bpf_iter_seq_array_map_info { struct bpf_map *map; void *percpu_value_buf; u32 index; }; static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos) { struct bpf_iter_seq_array_map_info *info = seq->private; struct bpf_map *map = info->map; struct bpf_array *array; u32 index; if (info->index >= map->max_entries) return NULL; if (*pos == 0) ++*pos; array = container_of(map, struct bpf_array, map); index = info->index & array->index_mask; if (info->percpu_value_buf) return (void *)(uintptr_t)array->pptrs[index]; return array_map_elem_ptr(array, index); } static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct bpf_iter_seq_array_map_info *info = seq->private; struct bpf_map *map = info->map; struct bpf_array *array; u32 index; ++*pos; ++info->index; if (info->index >= map->max_entries) return NULL; array = container_of(map, struct bpf_array, map); index = info->index & array->index_mask; if (info->percpu_value_buf) return (void *)(uintptr_t)array->pptrs[index]; return array_map_elem_ptr(array, index); } static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) { struct bpf_iter_seq_array_map_info *info = seq->private; struct bpf_iter__bpf_map_elem ctx = {}; struct bpf_map *map = info->map; struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_iter_meta meta; struct bpf_prog *prog; int off = 0, cpu = 0; void __percpu *pptr; u32 size; meta.seq = seq; prog = bpf_iter_get_info(&meta, v == NULL); if (!prog) return 0; ctx.meta = &meta; ctx.map = info->map; if (v) { ctx.key = &info->index; if (!info->percpu_value_buf) { ctx.value = v; } else { pptr = (void __percpu *)(uintptr_t)v; size = array->elem_size; for_each_possible_cpu(cpu) { copy_map_value_long(map, info->percpu_value_buf + off, per_cpu_ptr(pptr, cpu)); check_and_init_map_value(map, info->percpu_value_buf + off); off += size; } ctx.value = info->percpu_value_buf; } } return bpf_iter_run_prog(prog, &ctx); } static int bpf_array_map_seq_show(struct seq_file *seq, void *v) { return __bpf_array_map_seq_show(seq, v); } static void bpf_array_map_seq_stop(struct seq_file *seq, void *v) { if (!v) (void)__bpf_array_map_seq_show(seq, NULL); } static int bpf_iter_init_array_map(void *priv_data, struct bpf_iter_aux_info *aux) { struct bpf_iter_seq_array_map_info *seq_info = priv_data; struct bpf_map *map = aux->map; struct bpf_array *array = container_of(map, struct bpf_array, map); void *value_buf; u32 buf_size; if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { buf_size = array->elem_size * num_possible_cpus(); value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN); if (!value_buf) return -ENOMEM; seq_info->percpu_value_buf = value_buf; } /* bpf_iter_attach_map() acquires a map uref, and the uref may be * released before or in the middle of iterating map elements, so * acquire an extra map uref for iterator. */ bpf_map_inc_with_uref(map); seq_info->map = map; return 0; } static void bpf_iter_fini_array_map(void *priv_data) { struct bpf_iter_seq_array_map_info *seq_info = priv_data; bpf_map_put_with_uref(seq_info->map); kfree(seq_info->percpu_value_buf); } static const struct seq_operations bpf_array_map_seq_ops = { .start = bpf_array_map_seq_start, .next = bpf_array_map_seq_next, .stop = bpf_array_map_seq_stop, .show = bpf_array_map_seq_show, }; static const struct bpf_iter_seq_info iter_seq_info = { .seq_ops = &bpf_array_map_seq_ops, .init_seq_private = bpf_iter_init_array_map, .fini_seq_private = bpf_iter_fini_array_map, .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info), }; static long bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn, void *callback_ctx, u64 flags) { u32 i, key, num_elems = 0; struct bpf_array *array; bool is_percpu; u64 ret = 0; void *val; cant_migrate(); if (flags != 0) return -EINVAL; is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; array = container_of(map, struct bpf_array, map); for (i = 0; i < map->max_entries; i++) { if (is_percpu) val = this_cpu_ptr(array->pptrs[i]); else val = array_map_elem_ptr(array, i); num_elems++; key = i; ret = callback_fn((u64)(long)map, (u64)(long)&key, (u64)(long)val, (u64)(long)callback_ctx, 0); /* return value: 0 - continue, 1 - stop and return */ if (ret) break; } return num_elems; } static u64 array_map_mem_usage(const struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); bool percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; u32 elem_size = array->elem_size; u64 entries = map->max_entries; u64 usage = sizeof(*array); if (percpu) { usage += entries * sizeof(void *); usage += entries * elem_size * num_possible_cpus(); } else { if (map->map_flags & BPF_F_MMAPABLE) { usage = PAGE_ALIGN(usage); usage += PAGE_ALIGN(entries * elem_size); } else { usage += entries * elem_size; } } return usage; } BTF_ID_LIST_SINGLE(array_map_btf_ids, struct, bpf_array) const struct bpf_map_ops array_map_ops = { .map_meta_equal = array_map_meta_equal, .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, .map_release_uref = array_map_free_timers_wq, .map_lookup_elem = array_map_lookup_elem, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, .map_gen_lookup = array_map_gen_lookup, .map_direct_value_addr = array_map_direct_value_addr, .map_direct_value_meta = array_map_direct_value_meta, .map_mmap = array_map_mmap, .map_seq_show_elem = array_map_seq_show_elem, .map_check_btf = array_map_check_btf, .map_lookup_batch = generic_map_lookup_batch, .map_update_batch = generic_map_update_batch, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_array_elem, .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], .iter_seq_info = &iter_seq_info, }; const struct bpf_map_ops percpu_array_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = percpu_array_map_lookup_elem, .map_gen_lookup = percpu_array_map_gen_lookup, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, .map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem, .map_seq_show_elem = percpu_array_map_seq_show_elem, .map_check_btf = array_map_check_btf, .map_lookup_batch = generic_map_lookup_batch, .map_update_batch = generic_map_update_batch, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_array_elem, .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], .iter_seq_info = &iter_seq_info, }; static int fd_array_map_alloc_check(union bpf_attr *attr) { /* only file descriptors can be stored in this type of map */ if (attr->value_size != sizeof(u32)) return -EINVAL; /* Program read-only/write-only not supported for special maps yet. */ if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) return -EINVAL; return array_map_alloc_check(attr); } static void fd_array_map_free(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; /* make sure it's empty */ for (i = 0; i < array->map.max_entries; i++) BUG_ON(array->ptrs[i] != NULL); bpf_map_area_free(array); } static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) { return ERR_PTR(-EOPNOTSUPP); } /* only called from syscall */ int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) { void **elem, *ptr; int ret = 0; if (!map->ops->map_fd_sys_lookup_elem) return -ENOTSUPP; rcu_read_lock(); elem = array_map_lookup_elem(map, key); if (elem && (ptr = READ_ONCE(*elem))) *value = map->ops->map_fd_sys_lookup_elem(ptr); else ret = -ENOENT; rcu_read_unlock(); return ret; } /* only called from syscall */ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); void *new_ptr, *old_ptr; u32 index = *(u32 *)key, ufd; if (map_flags != BPF_ANY) return -EINVAL; if (index >= array->map.max_entries) return -E2BIG; ufd = *(u32 *)value; new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd); if (IS_ERR(new_ptr)) return PTR_ERR(new_ptr); if (map->ops->map_poke_run) { mutex_lock(&array->aux->poke_mutex); old_ptr = xchg(array->ptrs + index, new_ptr); map->ops->map_poke_run(map, index, old_ptr, new_ptr); mutex_unlock(&array->aux->poke_mutex); } else { old_ptr = xchg(array->ptrs + index, new_ptr); } if (old_ptr) map->ops->map_fd_put_ptr(map, old_ptr, true); return 0; } static long __fd_array_map_delete_elem(struct bpf_map *map, void *key, bool need_defer) { struct bpf_array *array = container_of(map, struct bpf_array, map); void *old_ptr; u32 index = *(u32 *)key; if (index >= array->map.max_entries) return -E2BIG; if (map->ops->map_poke_run) { mutex_lock(&array->aux->poke_mutex); old_ptr = xchg(array->ptrs + index, NULL); map->ops->map_poke_run(map, index, old_ptr, NULL); mutex_unlock(&array->aux->poke_mutex); } else { old_ptr = xchg(array->ptrs + index, NULL); } if (old_ptr) { map->ops->map_fd_put_ptr(map, old_ptr, need_defer); return 0; } else { return -ENOENT; } } static long fd_array_map_delete_elem(struct bpf_map *map, void *key) { return __fd_array_map_delete_elem(map, key, true); } static void *prog_fd_array_get_ptr(struct bpf_map *map, struct file *map_file, int fd) { struct bpf_prog *prog = bpf_prog_get(fd); bool is_extended; if (IS_ERR(prog)) return prog; if (prog->type == BPF_PROG_TYPE_EXT || !bpf_prog_map_compatible(map, prog)) { bpf_prog_put(prog); return ERR_PTR(-EINVAL); } mutex_lock(&prog->aux->ext_mutex); is_extended = prog->aux->is_extended; if (!is_extended) prog->aux->prog_array_member_cnt++; mutex_unlock(&prog->aux->ext_mutex); if (is_extended) { /* Extended prog can not be tail callee. It's to prevent a * potential infinite loop like: * tail callee prog entry -> tail callee prog subprog -> * freplace prog entry --tailcall-> tail callee prog entry. */ bpf_prog_put(prog); return ERR_PTR(-EBUSY); } return prog; } static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer) { struct bpf_prog *prog = ptr; mutex_lock(&prog->aux->ext_mutex); prog->aux->prog_array_member_cnt--; mutex_unlock(&prog->aux->ext_mutex); /* bpf_prog is freed after one RCU or tasks trace grace period */ bpf_prog_put(prog); } static u32 prog_fd_array_sys_lookup_elem(void *ptr) { return ((struct bpf_prog *)ptr)->aux->id; } /* decrement refcnt of all bpf_progs that are stored in this map */ static void bpf_fd_array_map_clear(struct bpf_map *map, bool need_defer) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; for (i = 0; i < array->map.max_entries; i++) __fd_array_map_delete_elem(map, &i, need_defer); } static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key, struct seq_file *m) { void **elem, *ptr; u32 prog_id; rcu_read_lock(); elem = array_map_lookup_elem(map, key); if (elem) { ptr = READ_ONCE(*elem); if (ptr) { seq_printf(m, "%u: ", *(u32 *)key); prog_id = prog_fd_array_sys_lookup_elem(ptr); btf_type_seq_show(map->btf, map->btf_value_type_id, &prog_id, m); seq_putc(m, '\n'); } } rcu_read_unlock(); } struct prog_poke_elem { struct list_head list; struct bpf_prog_aux *aux; }; static int prog_array_map_poke_track(struct bpf_map *map, struct bpf_prog_aux *prog_aux) { struct prog_poke_elem *elem; struct bpf_array_aux *aux; int ret = 0; aux = container_of(map, struct bpf_array, map)->aux; mutex_lock(&aux->poke_mutex); list_for_each_entry(elem, &aux->poke_progs, list) { if (elem->aux == prog_aux) goto out; } elem = kmalloc(sizeof(*elem), GFP_KERNEL); if (!elem) { ret = -ENOMEM; goto out; } INIT_LIST_HEAD(&elem->list); /* We must track the program's aux info at this point in time * since the program pointer itself may not be stable yet, see * also comment in prog_array_map_poke_run(). */ elem->aux = prog_aux; list_add_tail(&elem->list, &aux->poke_progs); out: mutex_unlock(&aux->poke_mutex); return ret; } static void prog_array_map_poke_untrack(struct bpf_map *map, struct bpf_prog_aux *prog_aux) { struct prog_poke_elem *elem, *tmp; struct bpf_array_aux *aux; aux = container_of(map, struct bpf_array, map)->aux; mutex_lock(&aux->poke_mutex); list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) { if (elem->aux == prog_aux) { list_del_init(&elem->list); kfree(elem); break; } } mutex_unlock(&aux->poke_mutex); } void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, struct bpf_prog *new, struct bpf_prog *old) { WARN_ON_ONCE(1); } static void prog_array_map_poke_run(struct bpf_map *map, u32 key, struct bpf_prog *old, struct bpf_prog *new) { struct prog_poke_elem *elem; struct bpf_array_aux *aux; aux = container_of(map, struct bpf_array, map)->aux; WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex)); list_for_each_entry(elem, &aux->poke_progs, list) { struct bpf_jit_poke_descriptor *poke; int i; for (i = 0; i < elem->aux->size_poke_tab; i++) { poke = &elem->aux->poke_tab[i]; /* Few things to be aware of: * * 1) We can only ever access aux in this context, but * not aux->prog since it might not be stable yet and * there could be danger of use after free otherwise. * 2) Initially when we start tracking aux, the program * is not JITed yet and also does not have a kallsyms * entry. We skip these as poke->tailcall_target_stable * is not active yet. The JIT will do the final fixup * before setting it stable. The various * poke->tailcall_target_stable are successively * activated, so tail call updates can arrive from here * while JIT is still finishing its final fixup for * non-activated poke entries. * 3) Also programs reaching refcount of zero while patching * is in progress is okay since we're protected under * poke_mutex and untrack the programs before the JIT * buffer is freed. */ if (!READ_ONCE(poke->tailcall_target_stable)) continue; if (poke->reason != BPF_POKE_REASON_TAIL_CALL) continue; if (poke->tail_call.map != map || poke->tail_call.key != key) continue; bpf_arch_poke_desc_update(poke, new, old); } } } static void prog_array_map_clear_deferred(struct work_struct *work) { struct bpf_map *map = container_of(work, struct bpf_array_aux, work)->map; bpf_fd_array_map_clear(map, true); bpf_map_put(map); } static void prog_array_map_clear(struct bpf_map *map) { struct bpf_array_aux *aux = container_of(map, struct bpf_array, map)->aux; bpf_map_inc(map); schedule_work(&aux->work); } static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr) { struct bpf_array_aux *aux; struct bpf_map *map; aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT); if (!aux) return ERR_PTR(-ENOMEM); INIT_WORK(&aux->work, prog_array_map_clear_deferred); INIT_LIST_HEAD(&aux->poke_progs); mutex_init(&aux->poke_mutex); map = array_map_alloc(attr); if (IS_ERR(map)) { kfree(aux); return map; } container_of(map, struct bpf_array, map)->aux = aux; aux->map = map; return map; } static void prog_array_map_free(struct bpf_map *map) { struct prog_poke_elem *elem, *tmp; struct bpf_array_aux *aux; aux = container_of(map, struct bpf_array, map)->aux; list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) { list_del_init(&elem->list); kfree(elem); } kfree(aux); fd_array_map_free(map); } /* prog_array->aux->{type,jited} is a runtime binding. * Doing static check alone in the verifier is not enough. * Thus, prog_array_map cannot be used as an inner_map * and map_meta_equal is not implemented. */ const struct bpf_map_ops prog_array_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = prog_array_map_alloc, .map_free = prog_array_map_free, .map_poke_track = prog_array_map_poke_track, .map_poke_untrack = prog_array_map_poke_untrack, .map_poke_run = prog_array_map_poke_run, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = fd_array_map_lookup_elem, .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = prog_fd_array_get_ptr, .map_fd_put_ptr = prog_fd_array_put_ptr, .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, .map_release_uref = prog_array_map_clear, .map_seq_show_elem = prog_array_map_seq_show_elem, .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], }; static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, struct file *map_file) { struct bpf_event_entry *ee; ee = kzalloc(sizeof(*ee), GFP_KERNEL); if (ee) { ee->event = perf_file->private_data; ee->perf_file = perf_file; ee->map_file = map_file; } return ee; } static void __bpf_event_entry_free(struct rcu_head *rcu) { struct bpf_event_entry *ee; ee = container_of(rcu, struct bpf_event_entry, rcu); fput(ee->perf_file); kfree(ee); } static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee) { call_rcu(&ee->rcu, __bpf_event_entry_free); } static void *perf_event_fd_array_get_ptr(struct bpf_map *map, struct file *map_file, int fd) { struct bpf_event_entry *ee; struct perf_event *event; struct file *perf_file; u64 value; perf_file = perf_event_get(fd); if (IS_ERR(perf_file)) return perf_file; ee = ERR_PTR(-EOPNOTSUPP); event = perf_file->private_data; if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP) goto err_out; ee = bpf_event_entry_gen(perf_file, map_file); if (ee) return ee; ee = ERR_PTR(-ENOMEM); err_out: fput(perf_file); return ee; } static void perf_event_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer) { /* bpf_perf_event is freed after one RCU grace period */ bpf_event_entry_free_rcu(ptr); } static void perf_event_fd_array_release(struct bpf_map *map, struct file *map_file) { struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_event_entry *ee; int i; if (map->map_flags & BPF_F_PRESERVE_ELEMS) return; rcu_read_lock(); for (i = 0; i < array->map.max_entries; i++) { ee = READ_ONCE(array->ptrs[i]); if (ee && ee->map_file == map_file) __fd_array_map_delete_elem(map, &i, true); } rcu_read_unlock(); } static void perf_event_fd_array_map_free(struct bpf_map *map) { if (map->map_flags & BPF_F_PRESERVE_ELEMS) bpf_fd_array_map_clear(map, false); fd_array_map_free(map); } const struct bpf_map_ops perf_event_array_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = perf_event_fd_array_map_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = fd_array_map_lookup_elem, .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = perf_event_fd_array_get_ptr, .map_fd_put_ptr = perf_event_fd_array_put_ptr, .map_release = perf_event_fd_array_release, .map_check_btf = map_check_no_btf, .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], }; #ifdef CONFIG_CGROUPS static void *cgroup_fd_array_get_ptr(struct bpf_map *map, struct file *map_file /* not used */, int fd) { return cgroup_get_from_fd(fd); } static void cgroup_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer) { /* cgroup_put free cgrp after a rcu grace period */ cgroup_put(ptr); } static void cgroup_fd_array_free(struct bpf_map *map) { bpf_fd_array_map_clear(map, false); fd_array_map_free(map); } const struct bpf_map_ops cgroup_array_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = cgroup_fd_array_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = fd_array_map_lookup_elem, .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = cgroup_fd_array_get_ptr, .map_fd_put_ptr = cgroup_fd_array_put_ptr, .map_check_btf = map_check_no_btf, .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], }; #endif static struct bpf_map *array_of_map_alloc(union bpf_attr *attr) { struct bpf_map *map, *inner_map_meta; inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); if (IS_ERR(inner_map_meta)) return inner_map_meta; map = array_map_alloc(attr); if (IS_ERR(map)) { bpf_map_meta_free(inner_map_meta); return map; } map->inner_map_meta = inner_map_meta; return map; } static void array_of_map_free(struct bpf_map *map) { /* map->inner_map_meta is only accessed by syscall which * is protected by fdget/fdput. */ bpf_map_meta_free(map->inner_map_meta); bpf_fd_array_map_clear(map, false); fd_array_map_free(map); } static void *array_of_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_map **inner_map = array_map_lookup_elem(map, key); if (!inner_map) return NULL; return READ_ONCE(*inner_map); } static int array_of_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 elem_size = array->elem_size; struct bpf_insn *insn = insn_buf; const int ret = BPF_REG_0; const int map_ptr = BPF_REG_1; const int index = BPF_REG_2; *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); if (!map->bypass_spec_v1) { *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6); *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); } else { *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); } if (is_power_of_2(elem_size)) *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); else *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); *insn++ = BPF_MOV64_IMM(ret, 0); return insn - insn_buf; } const struct bpf_map_ops array_of_maps_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_of_map_alloc, .map_free = array_of_map_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = array_of_map_lookup_elem, .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = bpf_map_fd_get_ptr, .map_fd_put_ptr = bpf_map_fd_put_ptr, .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, .map_gen_lookup = array_of_map_gen_lookup, .map_lookup_batch = generic_map_lookup_batch, .map_update_batch = generic_map_update_batch, .map_check_btf = map_check_no_btf, .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], };
1 1 10 1 7 5 1 5 1 2 2 2 2 1 2 1 3 4 4 3 3 1 1 1 4 2 2 296 297 287 15 6 3 5 2 2 2 1 2 540 343 54 22 19 20 2363 2362 540 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/init.h> #include <linux/module.h> #include <linux/netfilter.h> #include <net/flow_offload.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_offload.h> #include <net/pkt_cls.h> static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions) { struct nft_flow_rule *flow; flow = kzalloc(sizeof(struct nft_flow_rule), GFP_KERNEL); if (!flow) return NULL; flow->rule = flow_rule_alloc(num_actions); if (!flow->rule) { kfree(flow); return NULL; } flow->rule->match.dissector = &flow->match.dissector; flow->rule->match.mask = &flow->match.mask; flow->rule->match.key = &flow->match.key; return flow; } void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, enum flow_dissector_key_id addr_type) { struct nft_flow_match *match = &flow->match; struct nft_flow_key *mask = &match->mask; struct nft_flow_key *key = &match->key; if (match->dissector.used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL)) return; key->control.addr_type = addr_type; mask->control.addr_type = 0xffff; match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL); match->dissector.offset[FLOW_DISSECTOR_KEY_CONTROL] = offsetof(struct nft_flow_key, control); } struct nft_offload_ethertype { __be16 value; __be16 mask; }; static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow) { struct nft_flow_match *match = &flow->match; struct nft_offload_ethertype ethertype = { .value = match->key.basic.n_proto, .mask = match->mask.basic.n_proto, }; if (match->dissector.used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) && (match->key.vlan.vlan_tpid == htons(ETH_P_8021Q) || match->key.vlan.vlan_tpid == htons(ETH_P_8021AD))) { match->key.basic.n_proto = match->key.cvlan.vlan_tpid; match->mask.basic.n_proto = match->mask.cvlan.vlan_tpid; match->key.cvlan.vlan_tpid = match->key.vlan.vlan_tpid; match->mask.cvlan.vlan_tpid = match->mask.vlan.vlan_tpid; match->key.vlan.vlan_tpid = ethertype.value; match->mask.vlan.vlan_tpid = ethertype.mask; match->dissector.offset[FLOW_DISSECTOR_KEY_CVLAN] = offsetof(struct nft_flow_key, cvlan); match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN); } else if (match->dissector.used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) && (match->key.basic.n_proto == htons(ETH_P_8021Q) || match->key.basic.n_proto == htons(ETH_P_8021AD))) { match->key.basic.n_proto = match->key.vlan.vlan_tpid; match->mask.basic.n_proto = match->mask.vlan.vlan_tpid; match->key.vlan.vlan_tpid = ethertype.value; match->mask.vlan.vlan_tpid = ethertype.mask; match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] = offsetof(struct nft_flow_key, vlan); match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_VLAN); } } struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rule *rule) { struct nft_offload_ctx *ctx; struct nft_flow_rule *flow; int num_actions = 0, err; struct nft_expr *expr; expr = nft_expr_first(rule); while (nft_expr_more(rule, expr)) { if (expr->ops->offload_action && expr->ops->offload_action(expr)) num_actions++; expr = nft_expr_next(expr); } if (num_actions == 0) return ERR_PTR(-EOPNOTSUPP); flow = nft_flow_rule_alloc(num_actions); if (!flow) return ERR_PTR(-ENOMEM); expr = nft_expr_first(rule); ctx = kzalloc(sizeof(struct nft_offload_ctx), GFP_KERNEL); if (!ctx) { err = -ENOMEM; goto err_out; } ctx->net = net; ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC; while (nft_expr_more(rule, expr)) { if (!expr->ops->offload) { err = -EOPNOTSUPP; goto err_out; } err = expr->ops->offload(ctx, flow, expr); if (err < 0) goto err_out; expr = nft_expr_next(expr); } nft_flow_rule_transfer_vlan(ctx, flow); flow->proto = ctx->dep.l3num; kfree(ctx); return flow; err_out: kfree(ctx); nft_flow_rule_destroy(flow); return ERR_PTR(err); } void nft_flow_rule_destroy(struct nft_flow_rule *flow) { struct flow_action_entry *entry; int i; flow_action_for_each(i, entry, &flow->rule->action) { switch (entry->id) { case FLOW_ACTION_REDIRECT: case FLOW_ACTION_MIRRED: dev_put(entry->dev); break; default: break; } } kfree(flow->rule); kfree(flow); } void nft_offload_set_dependency(struct nft_offload_ctx *ctx, enum nft_offload_dep_type type) { ctx->dep.type = type; } void nft_offload_update_dependency(struct nft_offload_ctx *ctx, const void *data, u32 len) { switch (ctx->dep.type) { case NFT_OFFLOAD_DEP_NETWORK: WARN_ON(len != sizeof(__u16)); memcpy(&ctx->dep.l3num, data, sizeof(__u16)); break; case NFT_OFFLOAD_DEP_TRANSPORT: WARN_ON(len != sizeof(__u8)); memcpy(&ctx->dep.protonum, data, sizeof(__u8)); break; default: break; } ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC; } static void nft_flow_offload_common_init(struct flow_cls_common_offload *common, __be16 proto, int priority, struct netlink_ext_ack *extack) { common->protocol = proto; common->prio = priority; common->extack = extack; } static int nft_setup_cb_call(enum tc_setup_type type, void *type_data, struct list_head *cb_list) { struct flow_block_cb *block_cb; int err; list_for_each_entry(block_cb, cb_list, list) { err = block_cb->cb(type, type_data, block_cb->cb_priv); if (err < 0) return err; } return 0; } static int nft_chain_offload_priority(const struct nft_base_chain *basechain) { if (basechain->ops.priority <= 0 || basechain->ops.priority > USHRT_MAX) return -1; return 0; } bool nft_chain_offload_support(const struct nft_base_chain *basechain) { struct nf_hook_ops *ops; struct net_device *dev; struct nft_hook *hook; if (nft_chain_offload_priority(basechain) < 0) return false; list_for_each_entry(hook, &basechain->hook_list, list) { list_for_each_entry(ops, &hook->ops_list, list) { if (ops->pf != NFPROTO_NETDEV || ops->hooknum != NF_NETDEV_INGRESS) return false; dev = ops->dev; if (!dev->netdev_ops->ndo_setup_tc && !flow_indr_dev_exists()) return false; } } return true; } static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow, const struct nft_base_chain *basechain, const struct nft_rule *rule, const struct nft_flow_rule *flow, struct netlink_ext_ack *extack, enum flow_cls_command command) { __be16 proto = ETH_P_ALL; memset(cls_flow, 0, sizeof(*cls_flow)); if (flow) proto = flow->proto; nft_flow_offload_common_init(&cls_flow->common, proto, basechain->ops.priority, extack); cls_flow->command = command; cls_flow->cookie = (unsigned long) rule; if (flow) cls_flow->rule = flow->rule; } static int nft_flow_offload_cmd(const struct nft_chain *chain, const struct nft_rule *rule, struct nft_flow_rule *flow, enum flow_cls_command command, struct flow_cls_offload *cls_flow) { struct netlink_ext_ack extack = {}; struct nft_base_chain *basechain; if (!nft_is_base_chain(chain)) return -EOPNOTSUPP; basechain = nft_base_chain(chain); nft_flow_cls_offload_setup(cls_flow, basechain, rule, flow, &extack, command); return nft_setup_cb_call(TC_SETUP_CLSFLOWER, cls_flow, &basechain->flow_block.cb_list); } static int nft_flow_offload_rule(const struct nft_chain *chain, struct nft_rule *rule, struct nft_flow_rule *flow, enum flow_cls_command command) { struct flow_cls_offload cls_flow; return nft_flow_offload_cmd(chain, rule, flow, command, &cls_flow); } int nft_flow_rule_stats(const struct nft_chain *chain, const struct nft_rule *rule) { struct flow_cls_offload cls_flow = {}; struct nft_expr *expr, *next; int err; err = nft_flow_offload_cmd(chain, rule, NULL, FLOW_CLS_STATS, &cls_flow); if (err < 0) return err; nft_rule_for_each_expr(expr, next, rule) { if (expr->ops->offload_stats) expr->ops->offload_stats(expr, &cls_flow.stats); } return 0; } static int nft_flow_offload_bind(struct flow_block_offload *bo, struct nft_base_chain *basechain) { list_splice(&bo->cb_list, &basechain->flow_block.cb_list); return 0; } static int nft_flow_offload_unbind(struct flow_block_offload *bo, struct nft_base_chain *basechain) { struct flow_block_cb *block_cb, *next; struct flow_cls_offload cls_flow; struct netlink_ext_ack extack; struct nft_chain *chain; struct nft_rule *rule; chain = &basechain->chain; list_for_each_entry(rule, &chain->rules, list) { memset(&extack, 0, sizeof(extack)); nft_flow_cls_offload_setup(&cls_flow, basechain, rule, NULL, &extack, FLOW_CLS_DESTROY); nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow, &bo->cb_list); } list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { list_del(&block_cb->list); flow_block_cb_free(block_cb); } return 0; } static int nft_block_setup(struct nft_base_chain *basechain, struct flow_block_offload *bo, enum flow_block_command cmd) { int err; switch (cmd) { case FLOW_BLOCK_BIND: err = nft_flow_offload_bind(bo, basechain); break; case FLOW_BLOCK_UNBIND: err = nft_flow_offload_unbind(bo, basechain); break; default: WARN_ON_ONCE(1); err = -EOPNOTSUPP; } return err; } static void nft_flow_block_offload_init(struct flow_block_offload *bo, struct net *net, enum flow_block_command cmd, struct nft_base_chain *basechain, struct netlink_ext_ack *extack) { memset(bo, 0, sizeof(*bo)); bo->net = net; bo->block = &basechain->flow_block; bo->command = cmd; bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; bo->extack = extack; bo->cb_list_head = &basechain->flow_block.cb_list; INIT_LIST_HEAD(&bo->cb_list); } static int nft_block_offload_cmd(struct nft_base_chain *chain, struct net_device *dev, enum flow_block_command cmd) { struct netlink_ext_ack extack = {}; struct flow_block_offload bo; int err; nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack); err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); if (err < 0) return err; return nft_block_setup(chain, &bo, cmd); } static void nft_indr_block_cleanup(struct flow_block_cb *block_cb) { struct nft_base_chain *basechain = block_cb->indr.data; struct net_device *dev = block_cb->indr.dev; struct netlink_ext_ack extack = {}; struct nftables_pernet *nft_net; struct net *net = dev_net(dev); struct flow_block_offload bo; nft_flow_block_offload_init(&bo, dev_net(dev), FLOW_BLOCK_UNBIND, basechain, &extack); nft_net = nft_pernet(net); mutex_lock(&nft_net->commit_mutex); list_del(&block_cb->driver_list); list_move(&block_cb->list, &bo.cb_list); nft_flow_offload_unbind(&bo, basechain); mutex_unlock(&nft_net->commit_mutex); } static int nft_indr_block_offload_cmd(struct nft_base_chain *basechain, struct net_device *dev, enum flow_block_command cmd) { struct netlink_ext_ack extack = {}; struct flow_block_offload bo; int err; nft_flow_block_offload_init(&bo, dev_net(dev), cmd, basechain, &extack); err = flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_BLOCK, basechain, &bo, nft_indr_block_cleanup); if (err < 0) return err; if (list_empty(&bo.cb_list)) return -EOPNOTSUPP; return nft_block_setup(basechain, &bo, cmd); } static int nft_chain_offload_cmd(struct nft_base_chain *basechain, struct net_device *dev, enum flow_block_command cmd) { int err; if (dev->netdev_ops->ndo_setup_tc) err = nft_block_offload_cmd(basechain, dev, cmd); else err = nft_indr_block_offload_cmd(basechain, dev, cmd); return err; } static int nft_flow_block_chain(struct nft_base_chain *basechain, const struct net_device *this_dev, enum flow_block_command cmd) { struct nf_hook_ops *ops; struct nft_hook *hook; int err, i = 0; list_for_each_entry(hook, &basechain->hook_list, list) { list_for_each_entry(ops, &hook->ops_list, list) { if (this_dev && this_dev != ops->dev) continue; err = nft_chain_offload_cmd(basechain, ops->dev, cmd); if (err < 0 && cmd == FLOW_BLOCK_BIND) { if (!this_dev) goto err_flow_block; return err; } i++; } } return 0; err_flow_block: list_for_each_entry(hook, &basechain->hook_list, list) { list_for_each_entry(ops, &hook->ops_list, list) { if (i-- <= 0) break; nft_chain_offload_cmd(basechain, ops->dev, FLOW_BLOCK_UNBIND); } } return err; } static int nft_flow_offload_chain(struct nft_chain *chain, u8 *ppolicy, enum flow_block_command cmd) { struct nft_base_chain *basechain; u8 policy; if (!nft_is_base_chain(chain)) return -EOPNOTSUPP; basechain = nft_base_chain(chain); policy = ppolicy ? *ppolicy : basechain->policy; /* Only default policy to accept is supported for now. */ if (cmd == FLOW_BLOCK_BIND && policy == NF_DROP) return -EOPNOTSUPP; return nft_flow_block_chain(basechain, NULL, cmd); } static void nft_flow_rule_offload_abort(struct net *net, struct nft_trans *trans) { struct nftables_pernet *nft_net = nft_pernet(net); int err = 0; list_for_each_entry_continue_reverse(trans, &nft_net->commit_list, list) { if (trans->table->family != NFPROTO_NETDEV) continue; switch (trans->msg_type) { case NFT_MSG_NEWCHAIN: if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) || nft_trans_chain_update(trans)) continue; err = nft_flow_offload_chain(nft_trans_chain(trans), NULL, FLOW_BLOCK_UNBIND); break; case NFT_MSG_DELCHAIN: if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; err = nft_flow_offload_chain(nft_trans_chain(trans), NULL, FLOW_BLOCK_BIND); break; case NFT_MSG_NEWRULE: if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), NULL, FLOW_CLS_DESTROY); break; case NFT_MSG_DELRULE: if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), nft_trans_flow_rule(trans), FLOW_CLS_REPLACE); break; } if (WARN_ON_ONCE(err)) break; } } int nft_flow_rule_offload_commit(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans *trans; int err = 0; u8 policy; list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->table->family != NFPROTO_NETDEV) continue; switch (trans->msg_type) { case NFT_MSG_NEWCHAIN: if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) || nft_trans_chain_update(trans)) continue; policy = nft_trans_chain_policy(trans); err = nft_flow_offload_chain(nft_trans_chain(trans), &policy, FLOW_BLOCK_BIND); break; case NFT_MSG_DELCHAIN: if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; policy = nft_trans_chain_policy(trans); err = nft_flow_offload_chain(nft_trans_chain(trans), &policy, FLOW_BLOCK_UNBIND); break; case NFT_MSG_NEWRULE: if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; if (trans->flags & NLM_F_REPLACE || !(trans->flags & NLM_F_APPEND)) { err = -EOPNOTSUPP; break; } err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), nft_trans_flow_rule(trans), FLOW_CLS_REPLACE); break; case NFT_MSG_DELRULE: if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), NULL, FLOW_CLS_DESTROY); break; } if (err) { nft_flow_rule_offload_abort(net, trans); break; } } return err; } static struct nft_chain *__nft_offload_get_chain(const struct nftables_pernet *nft_net, struct net_device *dev) { struct nft_base_chain *basechain; struct nft_hook *hook, *found; const struct nft_table *table; struct nft_chain *chain; list_for_each_entry(table, &nft_net->tables, list) { if (table->family != NFPROTO_NETDEV) continue; list_for_each_entry(chain, &table->chains, list) { if (!nft_is_base_chain(chain) || !(chain->flags & NFT_CHAIN_HW_OFFLOAD)) continue; found = NULL; basechain = nft_base_chain(chain); list_for_each_entry(hook, &basechain->hook_list, list) { if (!nft_hook_find_ops(hook, dev)) continue; found = hook; break; } if (!found) continue; return chain; } } return NULL; } static int nft_offload_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct nftables_pernet *nft_net; struct net *net = dev_net(dev); struct nft_chain *chain; if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; nft_net = nft_pernet(net); mutex_lock(&nft_net->commit_mutex); chain = __nft_offload_get_chain(nft_net, dev); if (chain) nft_flow_block_chain(nft_base_chain(chain), dev, FLOW_BLOCK_UNBIND); mutex_unlock(&nft_net->commit_mutex); return NOTIFY_DONE; } static struct notifier_block nft_offload_netdev_notifier = { .notifier_call = nft_offload_netdev_event, }; int nft_offload_init(void) { return register_netdevice_notifier(&nft_offload_netdev_notifier); } void nft_offload_exit(void) { unregister_netdevice_notifier(&nft_offload_netdev_notifier); }
87 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Trace point definitions for the RDMA Connect Manager. * * Author: Chuck Lever <chuck.lever@oracle.com> * * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. */ #undef TRACE_SYSTEM #define TRACE_SYSTEM rdma_cma #if !defined(_TRACE_RDMA_CMA_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_RDMA_CMA_H #include <linux/tracepoint.h> #include <trace/misc/rdma.h> DECLARE_EVENT_CLASS(cma_fsm_class, TP_PROTO( const struct rdma_id_private *id_priv ), TP_ARGS(id_priv), TP_STRUCT__entry( __field(u32, cm_id) __field(u32, tos) __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( __entry->cm_id = id_priv->res.id; __entry->tos = id_priv->tos; memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, sizeof(struct sockaddr_in6)); memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, sizeof(struct sockaddr_in6)); ), TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u", __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos ) ); #define DEFINE_CMA_FSM_EVENT(name) \ DEFINE_EVENT(cma_fsm_class, cm_##name, \ TP_PROTO( \ const struct rdma_id_private *id_priv \ ), \ TP_ARGS(id_priv)) DEFINE_CMA_FSM_EVENT(send_rtu); DEFINE_CMA_FSM_EVENT(send_rej); DEFINE_CMA_FSM_EVENT(prepare_mra); DEFINE_CMA_FSM_EVENT(send_sidr_req); DEFINE_CMA_FSM_EVENT(send_sidr_rep); DEFINE_CMA_FSM_EVENT(disconnect); DEFINE_CMA_FSM_EVENT(sent_drep); DEFINE_CMA_FSM_EVENT(sent_dreq); DEFINE_CMA_FSM_EVENT(id_destroy); TRACE_EVENT(cm_id_attach, TP_PROTO( const struct rdma_id_private *id_priv, const struct ib_device *device ), TP_ARGS(id_priv, device), TP_STRUCT__entry( __field(u32, cm_id) __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) __string(devname, device->name) ), TP_fast_assign( __entry->cm_id = id_priv->res.id; memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, sizeof(struct sockaddr_in6)); memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, sizeof(struct sockaddr_in6)); __assign_str(devname); ), TP_printk("cm.id=%u src=%pISpc dst=%pISpc device=%s", __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __get_str(devname) ) ); DECLARE_EVENT_CLASS(cma_qp_class, TP_PROTO( const struct rdma_id_private *id_priv ), TP_ARGS(id_priv), TP_STRUCT__entry( __field(u32, cm_id) __field(u32, tos) __field(u32, qp_num) __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( __entry->cm_id = id_priv->res.id; __entry->tos = id_priv->tos; __entry->qp_num = id_priv->qp_num; memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, sizeof(struct sockaddr_in6)); memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, sizeof(struct sockaddr_in6)); ), TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u qp_num=%u", __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos, __entry->qp_num ) ); #define DEFINE_CMA_QP_EVENT(name) \ DEFINE_EVENT(cma_qp_class, cm_##name, \ TP_PROTO( \ const struct rdma_id_private *id_priv \ ), \ TP_ARGS(id_priv)) DEFINE_CMA_QP_EVENT(send_req); DEFINE_CMA_QP_EVENT(send_rep); DEFINE_CMA_QP_EVENT(qp_destroy); /* * enum ib_wp_type, from include/rdma/ib_verbs.h */ #define IB_QP_TYPE_LIST \ ib_qp_type(SMI) \ ib_qp_type(GSI) \ ib_qp_type(RC) \ ib_qp_type(UC) \ ib_qp_type(UD) \ ib_qp_type(RAW_IPV6) \ ib_qp_type(RAW_ETHERTYPE) \ ib_qp_type(RAW_PACKET) \ ib_qp_type(XRC_INI) \ ib_qp_type_end(XRC_TGT) #undef ib_qp_type #undef ib_qp_type_end #define ib_qp_type(x) TRACE_DEFINE_ENUM(IB_QPT_##x); #define ib_qp_type_end(x) TRACE_DEFINE_ENUM(IB_QPT_##x); IB_QP_TYPE_LIST #undef ib_qp_type #undef ib_qp_type_end #define ib_qp_type(x) { IB_QPT_##x, #x }, #define ib_qp_type_end(x) { IB_QPT_##x, #x } #define rdma_show_qp_type(x) \ __print_symbolic(x, IB_QP_TYPE_LIST) TRACE_EVENT(cm_qp_create, TP_PROTO( const struct rdma_id_private *id_priv, const struct ib_pd *pd, const struct ib_qp_init_attr *qp_init_attr, int rc ), TP_ARGS(id_priv, pd, qp_init_attr, rc), TP_STRUCT__entry( __field(u32, cm_id) __field(u32, pd_id) __field(u32, tos) __field(u32, qp_num) __field(u32, send_wr) __field(u32, recv_wr) __field(int, rc) __field(unsigned long, qp_type) __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( __entry->cm_id = id_priv->res.id; __entry->pd_id = pd->res.id; __entry->tos = id_priv->tos; __entry->send_wr = qp_init_attr->cap.max_send_wr; __entry->recv_wr = qp_init_attr->cap.max_recv_wr; __entry->rc = rc; if (!rc) { __entry->qp_num = id_priv->qp_num; __entry->qp_type = id_priv->id.qp_type; } else { __entry->qp_num = 0; __entry->qp_type = 0; } memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, sizeof(struct sockaddr_in6)); memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, sizeof(struct sockaddr_in6)); ), TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u pd.id=%u qp_type=%s" " send_wr=%u recv_wr=%u qp_num=%u rc=%d", __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos, __entry->pd_id, rdma_show_qp_type(__entry->qp_type), __entry->send_wr, __entry->recv_wr, __entry->qp_num, __entry->rc ) ); TRACE_EVENT(cm_req_handler, TP_PROTO( const struct rdma_id_private *id_priv, int event ), TP_ARGS(id_priv, event), TP_STRUCT__entry( __field(u32, cm_id) __field(u32, tos) __field(unsigned long, event) __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( __entry->cm_id = id_priv->res.id; __entry->tos = id_priv->tos; __entry->event = event; memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, sizeof(struct sockaddr_in6)); memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, sizeof(struct sockaddr_in6)); ), TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s (%lu)", __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos, rdma_show_ib_cm_event(__entry->event), __entry->event ) ); TRACE_EVENT(cm_event_handler, TP_PROTO( const struct rdma_id_private *id_priv, const struct rdma_cm_event *event ), TP_ARGS(id_priv, event), TP_STRUCT__entry( __field(u32, cm_id) __field(u32, tos) __field(unsigned long, event) __field(int, status) __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( __entry->cm_id = id_priv->res.id; __entry->tos = id_priv->tos; __entry->event = event->event; __entry->status = event->status; memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, sizeof(struct sockaddr_in6)); memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, sizeof(struct sockaddr_in6)); ), TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s (%lu/%d)", __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos, rdma_show_cm_event(__entry->event), __entry->event, __entry->status ) ); TRACE_EVENT(cm_event_done, TP_PROTO( const struct rdma_id_private *id_priv, const struct rdma_cm_event *event, int result ), TP_ARGS(id_priv, event, result), TP_STRUCT__entry( __field(u32, cm_id) __field(u32, tos) __field(unsigned long, event) __field(int, result) __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( __entry->cm_id = id_priv->res.id; __entry->tos = id_priv->tos; __entry->event = event->event; __entry->result = result; memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, sizeof(struct sockaddr_in6)); memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, sizeof(struct sockaddr_in6)); ), TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s consumer returns %d", __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos, rdma_show_cm_event(__entry->event), __entry->result ) ); DECLARE_EVENT_CLASS(cma_client_class, TP_PROTO( const struct ib_device *device ), TP_ARGS(device), TP_STRUCT__entry( __string(name, device->name) ), TP_fast_assign( __assign_str(name); ), TP_printk("device name=%s", __get_str(name) ) ); #define DEFINE_CMA_CLIENT_EVENT(name) \ DEFINE_EVENT(cma_client_class, cm_##name, \ TP_PROTO( \ const struct ib_device *device \ ), \ TP_ARGS(device)) DEFINE_CMA_CLIENT_EVENT(add_one); DEFINE_CMA_CLIENT_EVENT(remove_one); #endif /* _TRACE_RDMA_CMA_H */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE cma_trace #include <trace/define_trace.h>
42 42 11 11 71 72 42 72 72 15 1305 1303 1305 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 // SPDX-License-Identifier: GPL-2.0-only /* Page fragment allocator * * Page Fragment: * An arbitrary-length arbitrary-offset area of memory which resides within a * 0 or higher order page. Multiple fragments within that page are * individually refcounted, in the page's reference counter. * * The page_frag functions provide a simple allocation framework for page * fragments. This is used by the network stack and network device drivers to * provide a backing region of memory for use as either an sk_buff->head, or to * be used in the "frags" portion of skb_shared_info. */ #include <linux/build_bug.h> #include <linux/export.h> #include <linux/gfp_types.h> #include <linux/init.h> #include <linux/mm.h> #include <linux/page_frag_cache.h> #include "internal.h" static unsigned long encoded_page_create(struct page *page, unsigned int order, bool pfmemalloc) { BUILD_BUG_ON(PAGE_FRAG_CACHE_MAX_ORDER > PAGE_FRAG_CACHE_ORDER_MASK); BUILD_BUG_ON(PAGE_FRAG_CACHE_PFMEMALLOC_BIT >= PAGE_SIZE); return (unsigned long)page_address(page) | (order & PAGE_FRAG_CACHE_ORDER_MASK) | ((unsigned long)pfmemalloc * PAGE_FRAG_CACHE_PFMEMALLOC_BIT); } static unsigned long encoded_page_decode_order(unsigned long encoded_page) { return encoded_page & PAGE_FRAG_CACHE_ORDER_MASK; } static void *encoded_page_decode_virt(unsigned long encoded_page) { return (void *)(encoded_page & PAGE_MASK); } static struct page *encoded_page_decode_page(unsigned long encoded_page) { return virt_to_page((void *)encoded_page); } static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, gfp_t gfp_mask) { unsigned long order = PAGE_FRAG_CACHE_MAX_ORDER; struct page *page = NULL; gfp_t gfp = gfp_mask; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC; page = __alloc_pages(gfp_mask, PAGE_FRAG_CACHE_MAX_ORDER, numa_mem_id(), NULL); #endif if (unlikely(!page)) { page = __alloc_pages(gfp, 0, numa_mem_id(), NULL); order = 0; } nc->encoded_page = page ? encoded_page_create(page, order, page_is_pfmemalloc(page)) : 0; return page; } void page_frag_cache_drain(struct page_frag_cache *nc) { if (!nc->encoded_page) return; __page_frag_cache_drain(encoded_page_decode_page(nc->encoded_page), nc->pagecnt_bias); nc->encoded_page = 0; } EXPORT_SYMBOL(page_frag_cache_drain); void __page_frag_cache_drain(struct page *page, unsigned int count) { VM_BUG_ON_PAGE(page_ref_count(page) == 0, page); if (page_ref_sub_and_test(page, count)) free_frozen_pages(page, compound_order(page)); } EXPORT_SYMBOL(__page_frag_cache_drain); void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, gfp_t gfp_mask, unsigned int align_mask) { unsigned long encoded_page = nc->encoded_page; unsigned int size, offset; struct page *page; if (unlikely(!encoded_page)) { refill: page = __page_frag_cache_refill(nc, gfp_mask); if (!page) return NULL; encoded_page = nc->encoded_page; /* Even if we own the page, we do not use atomic_set(). * This would break get_page_unless_zero() users. */ page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE); /* reset page count bias and offset to start of new frag */ nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; nc->offset = 0; } size = PAGE_SIZE << encoded_page_decode_order(encoded_page); offset = __ALIGN_KERNEL_MASK(nc->offset, ~align_mask); if (unlikely(offset + fragsz > size)) { if (unlikely(fragsz > PAGE_SIZE)) { /* * The caller is trying to allocate a fragment * with fragsz > PAGE_SIZE but the cache isn't big * enough to satisfy the request, this may * happen in low memory conditions. * We don't release the cache page because * it could make memory pressure worse * so we simply return NULL here. */ return NULL; } page = encoded_page_decode_page(encoded_page); if (!page_ref_sub_and_test(page, nc->pagecnt_bias)) goto refill; if (unlikely(encoded_page_decode_pfmemalloc(encoded_page))) { free_frozen_pages(page, encoded_page_decode_order(encoded_page)); goto refill; } /* OK, page count is 0, we can safely set it */ set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1); /* reset page count bias and offset to start of new frag */ nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; offset = 0; } nc->pagecnt_bias--; nc->offset = offset + fragsz; return encoded_page_decode_virt(encoded_page) + offset; } EXPORT_SYMBOL(__page_frag_alloc_align); /* * Frees a page fragment allocated out of either a compound or order 0 page. */ void page_frag_free(void *addr) { struct page *page = virt_to_head_page(addr); if (unlikely(put_page_testzero(page))) free_frozen_pages(page, compound_order(page)); } EXPORT_SYMBOL(page_frag_free);
8 21 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /* * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ #ifndef CURVE25519_H #define CURVE25519_H #include <crypto/algapi.h> // For crypto_memneq. #include <linux/types.h> #include <linux/random.h> enum curve25519_lengths { CURVE25519_KEY_SIZE = 32 }; extern const u8 curve25519_null_point[]; extern const u8 curve25519_base_point[]; void curve25519_generic(u8 out[CURVE25519_KEY_SIZE], const u8 scalar[CURVE25519_KEY_SIZE], const u8 point[CURVE25519_KEY_SIZE]); void curve25519_arch(u8 out[CURVE25519_KEY_SIZE], const u8 scalar[CURVE25519_KEY_SIZE], const u8 point[CURVE25519_KEY_SIZE]); void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE]); bool curve25519_selftest(void); static inline bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE], const u8 basepoint[CURVE25519_KEY_SIZE]) { if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) curve25519_arch(mypublic, secret, basepoint); else curve25519_generic(mypublic, secret, basepoint); return crypto_memneq(mypublic, curve25519_null_point, CURVE25519_KEY_SIZE); } static inline bool __must_check curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE]) { if (unlikely(!crypto_memneq(secret, curve25519_null_point, CURVE25519_KEY_SIZE))) return false; if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) curve25519_base_arch(pub, secret); else curve25519_generic(pub, secret, curve25519_base_point); return crypto_memneq(pub, curve25519_null_point, CURVE25519_KEY_SIZE); } static inline void curve25519_clamp_secret(u8 secret[CURVE25519_KEY_SIZE]) { secret[0] &= 248; secret[31] = (secret[31] & 127) | 64; } static inline void curve25519_generate_secret(u8 secret[CURVE25519_KEY_SIZE]) { get_random_bytes_wait(secret, CURVE25519_KEY_SIZE); curve25519_clamp_secret(secret); } #endif /* CURVE25519_H */
3 3 2 1 1 5 2 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org> */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_offload.h> #include <net/netfilter/nf_dup_netdev.h> #include <net/neighbour.h> #include <net/ip.h> struct nft_fwd_netdev { u8 sreg_dev; }; static void nft_fwd_netdev_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_fwd_netdev *priv = nft_expr_priv(expr); int oif = regs->data[priv->sreg_dev]; struct sk_buff *skb = pkt->skb; /* This is used by ifb only. */ skb->skb_iif = skb->dev->ifindex; skb_set_redirected(skb, nft_hook(pkt) == NF_NETDEV_INGRESS); nf_fwd_netdev_egress(pkt, oif); regs->verdict.code = NF_STOLEN; } static const struct nla_policy nft_fwd_netdev_policy[NFTA_FWD_MAX + 1] = { [NFTA_FWD_SREG_DEV] = { .type = NLA_U32 }, [NFTA_FWD_SREG_ADDR] = { .type = NLA_U32 }, [NFTA_FWD_NFPROTO] = NLA_POLICY_MAX(NLA_BE32, 255), }; static int nft_fwd_netdev_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_fwd_netdev *priv = nft_expr_priv(expr); if (tb[NFTA_FWD_SREG_DEV] == NULL) return -EINVAL; return nft_parse_register_load(ctx, tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev, sizeof(int)); } static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { struct nft_fwd_netdev *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_FWD_SREG_DEV, priv->sreg_dev)) goto nla_put_failure; return 0; nla_put_failure: return -1; } static int nft_fwd_netdev_offload(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_expr *expr) { const struct nft_fwd_netdev *priv = nft_expr_priv(expr); int oif = ctx->regs[priv->sreg_dev].data.data[0]; return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_REDIRECT, oif); } static bool nft_fwd_netdev_offload_action(const struct nft_expr *expr) { return true; } struct nft_fwd_neigh { u8 sreg_dev; u8 sreg_addr; u8 nfproto; }; static void nft_fwd_neigh_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_fwd_neigh *priv = nft_expr_priv(expr); void *addr = &regs->data[priv->sreg_addr]; int oif = regs->data[priv->sreg_dev]; unsigned int verdict = NF_STOLEN; struct sk_buff *skb = pkt->skb; struct net_device *dev; int neigh_table; switch (priv->nfproto) { case NFPROTO_IPV4: { struct iphdr *iph; if (skb->protocol != htons(ETH_P_IP)) { verdict = NFT_BREAK; goto out; } if (skb_try_make_writable(skb, sizeof(*iph))) { verdict = NF_DROP; goto out; } iph = ip_hdr(skb); ip_decrease_ttl(iph); neigh_table = NEIGH_ARP_TABLE; break; } case NFPROTO_IPV6: { struct ipv6hdr *ip6h; if (skb->protocol != htons(ETH_P_IPV6)) { verdict = NFT_BREAK; goto out; } if (skb_try_make_writable(skb, sizeof(*ip6h))) { verdict = NF_DROP; goto out; } ip6h = ipv6_hdr(skb); ip6h->hop_limit--; neigh_table = NEIGH_ND_TABLE; break; } default: verdict = NFT_BREAK; goto out; } dev = dev_get_by_index_rcu(nft_net(pkt), oif); if (dev == NULL) return; skb->dev = dev; skb_clear_tstamp(skb); neigh_xmit(neigh_table, dev, addr, skb); out: regs->verdict.code = verdict; } static int nft_fwd_neigh_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_fwd_neigh *priv = nft_expr_priv(expr); unsigned int addr_len; int err; if (!tb[NFTA_FWD_SREG_DEV] || !tb[NFTA_FWD_SREG_ADDR] || !tb[NFTA_FWD_NFPROTO]) return -EINVAL; priv->nfproto = ntohl(nla_get_be32(tb[NFTA_FWD_NFPROTO])); switch (priv->nfproto) { case NFPROTO_IPV4: addr_len = sizeof(struct in_addr); break; case NFPROTO_IPV6: addr_len = sizeof(struct in6_addr); break; default: return -EOPNOTSUPP; } err = nft_parse_register_load(ctx, tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev, sizeof(int)); if (err < 0) return err; return nft_parse_register_load(ctx, tb[NFTA_FWD_SREG_ADDR], &priv->sreg_addr, addr_len); } static int nft_fwd_neigh_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { struct nft_fwd_neigh *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_FWD_SREG_DEV, priv->sreg_dev) || nft_dump_register(skb, NFTA_FWD_SREG_ADDR, priv->sreg_addr) || nla_put_be32(skb, NFTA_FWD_NFPROTO, htonl(priv->nfproto))) goto nla_put_failure; return 0; nla_put_failure: return -1; } static int nft_fwd_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS) | (1 << NF_NETDEV_EGRESS)); } static struct nft_expr_type nft_fwd_netdev_type; static const struct nft_expr_ops nft_fwd_neigh_netdev_ops = { .type = &nft_fwd_netdev_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_fwd_neigh)), .eval = nft_fwd_neigh_eval, .init = nft_fwd_neigh_init, .dump = nft_fwd_neigh_dump, .validate = nft_fwd_validate, .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops nft_fwd_netdev_ops = { .type = &nft_fwd_netdev_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_fwd_netdev)), .eval = nft_fwd_netdev_eval, .init = nft_fwd_netdev_init, .dump = nft_fwd_netdev_dump, .validate = nft_fwd_validate, .reduce = NFT_REDUCE_READONLY, .offload = nft_fwd_netdev_offload, .offload_action = nft_fwd_netdev_offload_action, }; static const struct nft_expr_ops * nft_fwd_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { if (tb[NFTA_FWD_SREG_ADDR]) return &nft_fwd_neigh_netdev_ops; if (tb[NFTA_FWD_SREG_DEV]) return &nft_fwd_netdev_ops; return ERR_PTR(-EOPNOTSUPP); } static struct nft_expr_type nft_fwd_netdev_type __read_mostly = { .family = NFPROTO_NETDEV, .name = "fwd", .select_ops = nft_fwd_select_ops, .policy = nft_fwd_netdev_policy, .maxattr = NFTA_FWD_MAX, .owner = THIS_MODULE, }; static int __init nft_fwd_netdev_module_init(void) { return nft_register_expr(&nft_fwd_netdev_type); } static void __exit nft_fwd_netdev_module_exit(void) { nft_unregister_expr(&nft_fwd_netdev_type); } module_init(nft_fwd_netdev_module_init); module_exit(nft_fwd_netdev_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_DESCRIPTION("nftables netdev packet forwarding support"); MODULE_ALIAS_NFT_AF_EXPR(5, "fwd");
28 1744 1633 121 41 1594 1 118 2124 1749 1753 1116 1700 1175 1 1171 1169 269 3 39 1170 39 1169 39 1360 966 1 688 689 688 688 1618 1590 93 78 28 92 82 57 45 727 1909 1910 106 1624 33 1992 1 1272 1989 46 405 30 530 59 3 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 #ifndef IOU_CORE_H #define IOU_CORE_H #include <linux/errno.h> #include <linux/lockdep.h> #include <linux/resume_user_mode.h> #include <linux/kasan.h> #include <linux/poll.h> #include <linux/io_uring_types.h> #include <uapi/linux/eventpoll.h> #include "alloc_cache.h" #include "io-wq.h" #include "slist.h" #include "filetable.h" #include "opdef.h" #ifndef CREATE_TRACE_POINTS #include <trace/events/io_uring.h> #endif enum { IOU_COMPLETE = 0, IOU_ISSUE_SKIP_COMPLETE = -EIOCBQUEUED, /* * The request has more work to do and should be retried. io_uring will * attempt to wait on the file for eligible opcodes, but otherwise * it'll be handed to iowq for blocking execution. It works for normal * requests as well as for the multi shot mode. */ IOU_RETRY = -EAGAIN, /* * Requeue the task_work to restart operations on this request. The * actual value isn't important, should just be not an otherwise * valid error code, yet less than -MAX_ERRNO and valid internally. */ IOU_REQUEUE = -3072, }; struct io_wait_queue { struct wait_queue_entry wq; struct io_ring_ctx *ctx; unsigned cq_tail; unsigned cq_min_tail; unsigned nr_timeouts; int hit_timeout; ktime_t min_timeout; ktime_t timeout; struct hrtimer t; #ifdef CONFIG_NET_RX_BUSY_POLL ktime_t napi_busy_poll_dt; bool napi_prefer_busy_poll; #endif }; static inline bool io_should_wake(struct io_wait_queue *iowq) { struct io_ring_ctx *ctx = iowq->ctx; int dist = READ_ONCE(ctx->rings->cq.tail) - (int) iowq->cq_tail; /* * Wake up if we have enough events, or if a timeout occurred since we * started waiting. For timeouts, we always want to return to userspace, * regardless of event count. */ return dist >= 0 || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; } #define IORING_MAX_ENTRIES 32768 #define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES) unsigned long rings_size(unsigned int flags, unsigned int sq_entries, unsigned int cq_entries, size_t *sq_offset); int io_uring_fill_params(unsigned entries, struct io_uring_params *p); bool io_cqe_cache_refill(struct io_ring_ctx *ctx, bool overflow); int io_run_task_work_sig(struct io_ring_ctx *ctx); void io_req_defer_failed(struct io_kiocb *req, s32 res); bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags); void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags); bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags); void __io_commit_cqring_flush(struct io_ring_ctx *ctx); void io_req_track_inflight(struct io_kiocb *req); struct file *io_file_get_normal(struct io_kiocb *req, int fd); struct file *io_file_get_fixed(struct io_kiocb *req, int fd, unsigned issue_flags); void __io_req_task_work_add(struct io_kiocb *req, unsigned flags); void io_req_task_work_add_remote(struct io_kiocb *req, unsigned flags); void io_req_task_queue(struct io_kiocb *req); void io_req_task_complete(struct io_kiocb *req, io_tw_token_t tw); void io_req_task_queue_fail(struct io_kiocb *req, int ret); void io_req_task_submit(struct io_kiocb *req, io_tw_token_t tw); struct llist_node *io_handle_tw_list(struct llist_node *node, unsigned int *count, unsigned int max_entries); struct llist_node *tctx_task_work_run(struct io_uring_task *tctx, unsigned int max_entries, unsigned int *count); void tctx_task_work(struct callback_head *cb); __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd); int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file, int start, int end); void io_req_queue_iowq(struct io_kiocb *req); int io_poll_issue(struct io_kiocb *req, io_tw_token_t tw); int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr); int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin); void __io_submit_flush_completions(struct io_ring_ctx *ctx); struct io_wq_work *io_wq_free_work(struct io_wq_work *work); void io_wq_submit_work(struct io_wq_work *work); void io_free_req(struct io_kiocb *req); void io_queue_next(struct io_kiocb *req); void io_task_refs_refill(struct io_uring_task *tctx); bool __io_alloc_req_refill(struct io_ring_ctx *ctx); bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx, bool cancel_all); void io_activate_pollwq(struct io_ring_ctx *ctx); static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx) { #if defined(CONFIG_PROVE_LOCKING) lockdep_assert(in_task()); if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) lockdep_assert_held(&ctx->uring_lock); if (ctx->flags & IORING_SETUP_IOPOLL) { lockdep_assert_held(&ctx->uring_lock); } else if (!ctx->task_complete) { lockdep_assert_held(&ctx->completion_lock); } else if (ctx->submitter_task) { /* * ->submitter_task may be NULL and we can still post a CQE, * if the ring has been setup with IORING_SETUP_R_DISABLED. * Not from an SQE, as those cannot be submitted, but via * updating tagged resources. */ if (!percpu_ref_is_dying(&ctx->refs)) lockdep_assert(current == ctx->submitter_task); } #endif } static inline bool io_is_compat(struct io_ring_ctx *ctx) { return IS_ENABLED(CONFIG_COMPAT) && unlikely(ctx->compat); } static inline void io_req_task_work_add(struct io_kiocb *req) { __io_req_task_work_add(req, 0); } static inline void io_submit_flush_completions(struct io_ring_ctx *ctx) { if (!wq_list_empty(&ctx->submit_state.compl_reqs) || ctx->submit_state.cq_flush) __io_submit_flush_completions(ctx); } #define io_for_each_link(pos, head) \ for (pos = (head); pos; pos = pos->link) static inline bool io_get_cqe_overflow(struct io_ring_ctx *ctx, struct io_uring_cqe **ret, bool overflow) { io_lockdep_assert_cq_locked(ctx); if (unlikely(ctx->cqe_cached >= ctx->cqe_sentinel)) { if (unlikely(!io_cqe_cache_refill(ctx, overflow))) return false; } *ret = ctx->cqe_cached; ctx->cached_cq_tail++; ctx->cqe_cached++; if (ctx->flags & IORING_SETUP_CQE32) ctx->cqe_cached++; return true; } static inline bool io_get_cqe(struct io_ring_ctx *ctx, struct io_uring_cqe **ret) { return io_get_cqe_overflow(ctx, ret, false); } static inline bool io_defer_get_uncommited_cqe(struct io_ring_ctx *ctx, struct io_uring_cqe **cqe_ret) { io_lockdep_assert_cq_locked(ctx); ctx->submit_state.cq_flush = true; return io_get_cqe(ctx, cqe_ret); } static __always_inline bool io_fill_cqe_req(struct io_ring_ctx *ctx, struct io_kiocb *req) { struct io_uring_cqe *cqe; /* * If we can't get a cq entry, userspace overflowed the * submission (by quite a lot). Increment the overflow count in * the ring. */ if (unlikely(!io_get_cqe(ctx, &cqe))) return false; memcpy(cqe, &req->cqe, sizeof(*cqe)); if (ctx->flags & IORING_SETUP_CQE32) { memcpy(cqe->big_cqe, &req->big_cqe, sizeof(*cqe)); memset(&req->big_cqe, 0, sizeof(req->big_cqe)); } if (trace_io_uring_complete_enabled()) trace_io_uring_complete(req->ctx, req, cqe); return true; } static inline void req_set_fail(struct io_kiocb *req) { req->flags |= REQ_F_FAIL; if (req->flags & REQ_F_CQE_SKIP) { req->flags &= ~REQ_F_CQE_SKIP; req->flags |= REQ_F_SKIP_LINK_CQES; } } static inline void io_req_set_res(struct io_kiocb *req, s32 res, u32 cflags) { req->cqe.res = res; req->cqe.flags = cflags; } static inline void *io_uring_alloc_async_data(struct io_alloc_cache *cache, struct io_kiocb *req) { if (cache) { req->async_data = io_cache_alloc(cache, GFP_KERNEL); } else { const struct io_issue_def *def = &io_issue_defs[req->opcode]; WARN_ON_ONCE(!def->async_size); req->async_data = kmalloc(def->async_size, GFP_KERNEL); } if (req->async_data) req->flags |= REQ_F_ASYNC_DATA; return req->async_data; } static inline bool req_has_async_data(struct io_kiocb *req) { return req->flags & REQ_F_ASYNC_DATA; } static inline void io_put_file(struct io_kiocb *req) { if (!(req->flags & REQ_F_FIXED_FILE) && req->file) fput(req->file); } static inline void io_ring_submit_unlock(struct io_ring_ctx *ctx, unsigned issue_flags) { lockdep_assert_held(&ctx->uring_lock); if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) mutex_unlock(&ctx->uring_lock); } static inline void io_ring_submit_lock(struct io_ring_ctx *ctx, unsigned issue_flags) { /* * "Normal" inline submissions always hold the uring_lock, since we * grab it from the system call. Same is true for the SQPOLL offload. * The only exception is when we've detached the request and issue it * from an async worker thread, grab the lock for that case. */ if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) mutex_lock(&ctx->uring_lock); lockdep_assert_held(&ctx->uring_lock); } static inline void io_commit_cqring(struct io_ring_ctx *ctx) { /* order cqe stores with ring update */ smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail); } static inline void io_poll_wq_wake(struct io_ring_ctx *ctx) { if (wq_has_sleeper(&ctx->poll_wq)) __wake_up(&ctx->poll_wq, TASK_NORMAL, 0, poll_to_key(EPOLL_URING_WAKE | EPOLLIN)); } static inline void io_cqring_wake(struct io_ring_ctx *ctx) { /* * Trigger waitqueue handler on all waiters on our waitqueue. This * won't necessarily wake up all the tasks, io_should_wake() will make * that decision. * * Pass in EPOLLIN|EPOLL_URING_WAKE as the poll wakeup key. The latter * set in the mask so that if we recurse back into our own poll * waitqueue handlers, we know we have a dependency between eventfd or * epoll and should terminate multishot poll at that point. */ if (wq_has_sleeper(&ctx->cq_wait)) __wake_up(&ctx->cq_wait, TASK_NORMAL, 0, poll_to_key(EPOLL_URING_WAKE | EPOLLIN)); } static inline bool io_sqring_full(struct io_ring_ctx *ctx) { struct io_rings *r = ctx->rings; /* * SQPOLL must use the actual sqring head, as using the cached_sq_head * is race prone if the SQPOLL thread has grabbed entries but not yet * committed them to the ring. For !SQPOLL, this doesn't matter, but * since this helper is just used for SQPOLL sqring waits (or POLLOUT), * just read the actual sqring head unconditionally. */ return READ_ONCE(r->sq.tail) - READ_ONCE(r->sq.head) == ctx->sq_entries; } static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) { struct io_rings *rings = ctx->rings; unsigned int entries; /* make sure SQ entry isn't read before tail */ entries = smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head; return min(entries, ctx->sq_entries); } static inline int io_run_task_work(void) { bool ret = false; /* * Always check-and-clear the task_work notification signal. With how * signaling works for task_work, we can find it set with nothing to * run. We need to clear it for that case, like get_signal() does. */ if (test_thread_flag(TIF_NOTIFY_SIGNAL)) clear_notify_signal(); /* * PF_IO_WORKER never returns to userspace, so check here if we have * notify work that needs processing. */ if (current->flags & PF_IO_WORKER) { if (test_thread_flag(TIF_NOTIFY_RESUME)) { __set_current_state(TASK_RUNNING); resume_user_mode_work(NULL); } if (current->io_uring) { unsigned int count = 0; __set_current_state(TASK_RUNNING); tctx_task_work_run(current->io_uring, UINT_MAX, &count); if (count) ret = true; } } if (task_work_pending(current)) { __set_current_state(TASK_RUNNING); task_work_run(); ret = true; } return ret; } static inline bool io_local_work_pending(struct io_ring_ctx *ctx) { return !llist_empty(&ctx->work_llist) || !llist_empty(&ctx->retry_llist); } static inline bool io_task_work_pending(struct io_ring_ctx *ctx) { return task_work_pending(current) || io_local_work_pending(ctx); } static inline void io_tw_lock(struct io_ring_ctx *ctx, io_tw_token_t tw) { lockdep_assert_held(&ctx->uring_lock); } /* * Don't complete immediately but use deferred completion infrastructure. * Protected by ->uring_lock and can only be used either with * IO_URING_F_COMPLETE_DEFER or inside a tw handler holding the mutex. */ static inline void io_req_complete_defer(struct io_kiocb *req) __must_hold(&req->ctx->uring_lock) { struct io_submit_state *state = &req->ctx->submit_state; lockdep_assert_held(&req->ctx->uring_lock); wq_list_add_tail(&req->comp_list, &state->compl_reqs); } static inline void io_commit_cqring_flush(struct io_ring_ctx *ctx) { if (unlikely(ctx->off_timeout_used || ctx->has_evfd || ctx->poll_activated)) __io_commit_cqring_flush(ctx); } static inline void io_get_task_refs(int nr) { struct io_uring_task *tctx = current->io_uring; tctx->cached_refs -= nr; if (unlikely(tctx->cached_refs < 0)) io_task_refs_refill(tctx); } static inline bool io_req_cache_empty(struct io_ring_ctx *ctx) { return !ctx->submit_state.free_list.next; } extern struct kmem_cache *req_cachep; static inline struct io_kiocb *io_extract_req(struct io_ring_ctx *ctx) { struct io_kiocb *req; req = container_of(ctx->submit_state.free_list.next, struct io_kiocb, comp_list); wq_stack_extract(&ctx->submit_state.free_list); return req; } static inline bool io_alloc_req(struct io_ring_ctx *ctx, struct io_kiocb **req) { if (unlikely(io_req_cache_empty(ctx))) { if (!__io_alloc_req_refill(ctx)) return false; } *req = io_extract_req(ctx); return true; } static inline bool io_allowed_defer_tw_run(struct io_ring_ctx *ctx) { return likely(ctx->submitter_task == current); } static inline bool io_allowed_run_tw(struct io_ring_ctx *ctx) { return likely(!(ctx->flags & IORING_SETUP_DEFER_TASKRUN) || ctx->submitter_task == current); } /* * Terminate the request if either of these conditions are true: * * 1) It's being executed by the original task, but that task is marked * with PF_EXITING as it's exiting. * 2) PF_KTHREAD is set, in which case the invoker of the task_work is * our fallback task_work. */ static inline bool io_should_terminate_tw(void) { return current->flags & (PF_KTHREAD | PF_EXITING); } static inline void io_req_queue_tw_complete(struct io_kiocb *req, s32 res) { io_req_set_res(req, res, 0); req->io_task_work.func = io_req_task_complete; io_req_task_work_add(req); } /* * IORING_SETUP_SQE128 contexts allocate twice the normal SQE size for each * slot. */ static inline size_t uring_sqe_size(struct io_ring_ctx *ctx) { if (ctx->flags & IORING_SETUP_SQE128) return 2 * sizeof(struct io_uring_sqe); return sizeof(struct io_uring_sqe); } static inline bool io_file_can_poll(struct io_kiocb *req) { if (req->flags & REQ_F_CAN_POLL) return true; if (req->file && file_can_poll(req->file)) { req->flags |= REQ_F_CAN_POLL; return true; } return false; } static inline ktime_t io_get_time(struct io_ring_ctx *ctx) { if (ctx->clockid == CLOCK_MONOTONIC) return ktime_get(); return ktime_get_with_offset(ctx->clock_offset); } enum { IO_CHECK_CQ_OVERFLOW_BIT, IO_CHECK_CQ_DROPPED_BIT, }; static inline bool io_has_work(struct io_ring_ctx *ctx) { return test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq) || io_local_work_pending(ctx); } #endif
200 200 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 // SPDX-License-Identifier: GPL-2.0 #include <linux/sysctl.h> #include <net/lwtunnel.h> #include <net/netfilter/nf_hooks_lwtunnel.h> #include <linux/netfilter.h> #include "nf_internals.h" static inline int nf_hooks_lwtunnel_get(void) { if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return 1; else return 0; } static inline int nf_hooks_lwtunnel_set(int enable) { if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) { if (!enable) return -EBUSY; } else if (enable) { static_branch_enable(&nf_hooks_lwtunnel_enabled); } return 0; } #ifdef CONFIG_SYSCTL int nf_hooks_lwtunnel_sysctl_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int proc_nf_hooks_lwtunnel_enabled = 0; struct ctl_table tmp = { .procname = table->procname, .data = &proc_nf_hooks_lwtunnel_enabled, .maxlen = sizeof(int), .mode = table->mode, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }; int ret; if (!write) proc_nf_hooks_lwtunnel_enabled = nf_hooks_lwtunnel_get(); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) ret = nf_hooks_lwtunnel_set(proc_nf_hooks_lwtunnel_enabled); return ret; } EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler); static struct ctl_table nf_lwtunnel_sysctl_table[] = { { .procname = "nf_hooks_lwtunnel", .data = NULL, .maxlen = sizeof(int), .mode = 0644, .proc_handler = nf_hooks_lwtunnel_sysctl_handler, }, }; static int __net_init nf_lwtunnel_net_init(struct net *net) { struct ctl_table_header *hdr; struct ctl_table *table; table = nf_lwtunnel_sysctl_table; if (!net_eq(net, &init_net)) { table = kmemdup(nf_lwtunnel_sysctl_table, sizeof(nf_lwtunnel_sysctl_table), GFP_KERNEL); if (!table) goto err_alloc; } hdr = register_net_sysctl_sz(net, "net/netfilter", table, ARRAY_SIZE(nf_lwtunnel_sysctl_table)); if (!hdr) goto err_reg; net->nf.nf_lwtnl_dir_header = hdr; return 0; err_reg: if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; } static void __net_exit nf_lwtunnel_net_exit(struct net *net) { const struct ctl_table *table; table = net->nf.nf_lwtnl_dir_header->ctl_table_arg; unregister_net_sysctl_table(net->nf.nf_lwtnl_dir_header); if (!net_eq(net, &init_net)) kfree(table); } static struct pernet_operations nf_lwtunnel_net_ops = { .init = nf_lwtunnel_net_init, .exit = nf_lwtunnel_net_exit, }; int __init netfilter_lwtunnel_init(void) { return register_pernet_subsys(&nf_lwtunnel_net_ops); } void netfilter_lwtunnel_fini(void) { unregister_pernet_subsys(&nf_lwtunnel_net_ops); } #else int __init netfilter_lwtunnel_init(void) { return 0; } void netfilter_lwtunnel_fini(void) {} #endif /* CONFIG_SYSCTL */
7 1 6 11 1 10 3 1 9 9 9 8 8 8 3 2 2 27 27 1 11 25 25 24 27 4 24 1 14 25 24 27 27 27 27 29 2 1 27 10 10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 /* * Copyright (c) 2010-2011 Atheros Communications Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <linux/unaligned.h> #include "htc.h" MODULE_FIRMWARE(HTC_7010_MODULE_FW); MODULE_FIRMWARE(HTC_9271_MODULE_FW); static const struct usb_device_id ath9k_hif_usb_ids[] = { { USB_DEVICE(0x0cf3, 0x9271) }, /* Atheros */ { USB_DEVICE(0x0cf3, 0x1006) }, /* Atheros */ { USB_DEVICE(0x0846, 0x9030) }, /* Netgear N150 */ { USB_DEVICE(0x07b8, 0x9271) }, /* Altai WA1011N-GU */ { USB_DEVICE(0x07D1, 0x3A10) }, /* Dlink Wireless 150 */ { USB_DEVICE(0x13D3, 0x3327) }, /* Azurewave */ { USB_DEVICE(0x13D3, 0x3328) }, /* Azurewave */ { USB_DEVICE(0x13D3, 0x3346) }, /* IMC Networks */ { USB_DEVICE(0x13D3, 0x3348) }, /* Azurewave */ { USB_DEVICE(0x13D3, 0x3349) }, /* Azurewave */ { USB_DEVICE(0x13D3, 0x3350) }, /* Azurewave */ { USB_DEVICE(0x04CA, 0x4605) }, /* Liteon */ { USB_DEVICE(0x040D, 0x3801) }, /* VIA */ { USB_DEVICE(0x0cf3, 0xb003) }, /* Ubiquiti WifiStation Ext */ { USB_DEVICE(0x0cf3, 0xb002) }, /* Ubiquiti WifiStation */ { USB_DEVICE(0x057c, 0x8403) }, /* AVM FRITZ!WLAN 11N v2 USB */ { USB_DEVICE(0x0471, 0x209e) }, /* Philips (or NXP) PTA01 */ { USB_DEVICE(0x1eda, 0x2315) }, /* AirTies */ { USB_DEVICE(0x0cf3, 0x7015), .driver_info = AR9287_USB }, /* Atheros */ { USB_DEVICE(0x0cf3, 0x7010), .driver_info = AR9280_USB }, /* Atheros */ { USB_DEVICE(0x0846, 0x9018), .driver_info = AR9280_USB }, /* Netgear WNDA3200 */ { USB_DEVICE(0x083A, 0xA704), .driver_info = AR9280_USB }, /* SMC Networks */ { USB_DEVICE(0x0411, 0x017f), .driver_info = AR9280_USB }, /* Sony UWA-BR100 */ { USB_DEVICE(0x0411, 0x0197), .driver_info = AR9280_USB }, /* Buffalo WLI-UV-AG300P */ { USB_DEVICE(0x04da, 0x3904), .driver_info = AR9280_USB }, { USB_DEVICE(0x0930, 0x0a08), .driver_info = AR9280_USB }, /* Toshiba WLM-20U2 and GN-1080 */ { USB_DEVICE(0x0cf3, 0x20ff), .driver_info = STORAGE_DEVICE }, { }, }; MODULE_DEVICE_TABLE(usb, ath9k_hif_usb_ids); static int __hif_usb_tx(struct hif_device_usb *hif_dev); static void hif_usb_regout_cb(struct urb *urb) { struct cmd_buf *cmd = urb->context; switch (urb->status) { case 0: break; case -ENOENT: case -ECONNRESET: case -ENODEV: case -ESHUTDOWN: goto free; default: break; } if (cmd) { ath9k_htc_txcompletion_cb(cmd->hif_dev->htc_handle, cmd->skb, true); kfree(cmd); } return; free: kfree_skb(cmd->skb); kfree(cmd); } static int hif_usb_send_regout(struct hif_device_usb *hif_dev, struct sk_buff *skb) { struct urb *urb; struct cmd_buf *cmd; int ret = 0; urb = usb_alloc_urb(0, GFP_KERNEL); if (urb == NULL) return -ENOMEM; cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (cmd == NULL) { usb_free_urb(urb); return -ENOMEM; } cmd->skb = skb; cmd->hif_dev = hif_dev; usb_fill_int_urb(urb, hif_dev->udev, usb_sndintpipe(hif_dev->udev, USB_REG_OUT_PIPE), skb->data, skb->len, hif_usb_regout_cb, cmd, 1); usb_anchor_urb(urb, &hif_dev->regout_submitted); ret = usb_submit_urb(urb, GFP_KERNEL); if (ret) { usb_unanchor_urb(urb); kfree(cmd); } usb_free_urb(urb); return ret; } static void hif_usb_mgmt_cb(struct urb *urb) { struct cmd_buf *cmd = urb->context; struct hif_device_usb *hif_dev; unsigned long flags; bool txok = true; if (!cmd || !cmd->skb || !cmd->hif_dev) return; hif_dev = cmd->hif_dev; switch (urb->status) { case 0: break; case -ENOENT: case -ECONNRESET: case -ENODEV: case -ESHUTDOWN: txok = false; /* * If the URBs are being flushed, no need to complete * this packet. */ spin_lock_irqsave(&hif_dev->tx.tx_lock, flags); if (hif_dev->tx.flags & HIF_USB_TX_FLUSH) { spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags); dev_kfree_skb_any(cmd->skb); kfree(cmd); return; } spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags); break; default: txok = false; break; } skb_pull(cmd->skb, 4); ath9k_htc_txcompletion_cb(cmd->hif_dev->htc_handle, cmd->skb, txok); kfree(cmd); } static int hif_usb_send_mgmt(struct hif_device_usb *hif_dev, struct sk_buff *skb) { struct urb *urb; struct cmd_buf *cmd; int ret = 0; __le16 *hdr; urb = usb_alloc_urb(0, GFP_ATOMIC); if (urb == NULL) return -ENOMEM; cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC); if (cmd == NULL) { usb_free_urb(urb); return -ENOMEM; } cmd->skb = skb; cmd->hif_dev = hif_dev; hdr = skb_push(skb, 4); *hdr++ = cpu_to_le16(skb->len - 4); *hdr++ = cpu_to_le16(ATH_USB_TX_STREAM_MODE_TAG); usb_fill_bulk_urb(urb, hif_dev->udev, usb_sndbulkpipe(hif_dev->udev, USB_WLAN_TX_PIPE), skb->data, skb->len, hif_usb_mgmt_cb, cmd); usb_anchor_urb(urb, &hif_dev->mgmt_submitted); ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret) { usb_unanchor_urb(urb); kfree(cmd); } usb_free_urb(urb); return ret; } static inline void ath9k_skb_queue_purge(struct hif_device_usb *hif_dev, struct sk_buff_head *list) { struct sk_buff *skb; while ((skb = __skb_dequeue(list)) != NULL) { dev_kfree_skb_any(skb); } } static inline void ath9k_skb_queue_complete(struct hif_device_usb *hif_dev, struct sk_buff_head *queue, bool txok) { struct sk_buff *skb; while ((skb = __skb_dequeue(queue)) != NULL) { #ifdef CONFIG_ATH9K_HTC_DEBUGFS int ln = skb->len; #endif ath9k_htc_txcompletion_cb(hif_dev->htc_handle, skb, txok); if (txok) { TX_STAT_INC(hif_dev, skb_success); TX_STAT_ADD(hif_dev, skb_success_bytes, ln); } else TX_STAT_INC(hif_dev, skb_failed); } } static void hif_usb_tx_cb(struct urb *urb) { struct tx_buf *tx_buf = urb->context; struct hif_device_usb *hif_dev; bool txok = true; if (!tx_buf || !tx_buf->hif_dev) return; hif_dev = tx_buf->hif_dev; switch (urb->status) { case 0: break; case -ENOENT: case -ECONNRESET: case -ENODEV: case -ESHUTDOWN: txok = false; /* * If the URBs are being flushed, no need to add this * URB to the free list. */ spin_lock(&hif_dev->tx.tx_lock); if (hif_dev->tx.flags & HIF_USB_TX_FLUSH) { spin_unlock(&hif_dev->tx.tx_lock); ath9k_skb_queue_purge(hif_dev, &tx_buf->skb_queue); return; } spin_unlock(&hif_dev->tx.tx_lock); break; default: txok = false; break; } ath9k_skb_queue_complete(hif_dev, &tx_buf->skb_queue, txok); /* Re-initialize the SKB queue */ tx_buf->len = tx_buf->offset = 0; __skb_queue_head_init(&tx_buf->skb_queue); /* Add this TX buffer to the free list */ spin_lock(&hif_dev->tx.tx_lock); list_move_tail(&tx_buf->list, &hif_dev->tx.tx_buf); hif_dev->tx.tx_buf_cnt++; if (!(hif_dev->tx.flags & HIF_USB_TX_STOP)) __hif_usb_tx(hif_dev); /* Check for pending SKBs */ TX_STAT_INC(hif_dev, buf_completed); spin_unlock(&hif_dev->tx.tx_lock); } /* TX lock has to be taken */ static int __hif_usb_tx(struct hif_device_usb *hif_dev) { struct tx_buf *tx_buf = NULL; struct sk_buff *nskb = NULL; int ret = 0, i; u16 tx_skb_cnt = 0; u8 *buf; __le16 *hdr; if (hif_dev->tx.tx_skb_cnt == 0) return 0; /* Check if a free TX buffer is available */ if (list_empty(&hif_dev->tx.tx_buf)) return 0; tx_buf = list_first_entry(&hif_dev->tx.tx_buf, struct tx_buf, list); list_move_tail(&tx_buf->list, &hif_dev->tx.tx_pending); hif_dev->tx.tx_buf_cnt--; tx_skb_cnt = min_t(u16, hif_dev->tx.tx_skb_cnt, MAX_TX_AGGR_NUM); for (i = 0; i < tx_skb_cnt; i++) { nskb = __skb_dequeue(&hif_dev->tx.tx_skb_queue); /* Should never be NULL */ BUG_ON(!nskb); hif_dev->tx.tx_skb_cnt--; buf = tx_buf->buf; buf += tx_buf->offset; hdr = (__le16 *)buf; *hdr++ = cpu_to_le16(nskb->len); *hdr++ = cpu_to_le16(ATH_USB_TX_STREAM_MODE_TAG); buf += 4; memcpy(buf, nskb->data, nskb->len); tx_buf->len = nskb->len + 4; if (i < (tx_skb_cnt - 1)) tx_buf->offset += (((tx_buf->len - 1) / 4) + 1) * 4; if (i == (tx_skb_cnt - 1)) tx_buf->len += tx_buf->offset; __skb_queue_tail(&tx_buf->skb_queue, nskb); TX_STAT_INC(hif_dev, skb_queued); } usb_fill_bulk_urb(tx_buf->urb, hif_dev->udev, usb_sndbulkpipe(hif_dev->udev, USB_WLAN_TX_PIPE), tx_buf->buf, tx_buf->len, hif_usb_tx_cb, tx_buf); ret = usb_submit_urb(tx_buf->urb, GFP_ATOMIC); if (ret) { tx_buf->len = tx_buf->offset = 0; ath9k_skb_queue_complete(hif_dev, &tx_buf->skb_queue, false); __skb_queue_head_init(&tx_buf->skb_queue); list_move_tail(&tx_buf->list, &hif_dev->tx.tx_buf); hif_dev->tx.tx_buf_cnt++; } else { TX_STAT_INC(hif_dev, buf_queued); } return ret; } static int hif_usb_send_tx(struct hif_device_usb *hif_dev, struct sk_buff *skb) { struct ath9k_htc_tx_ctl *tx_ctl; unsigned long flags; int ret = 0; spin_lock_irqsave(&hif_dev->tx.tx_lock, flags); if (hif_dev->tx.flags & HIF_USB_TX_STOP) { spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags); return -ENODEV; } /* Check if the max queue count has been reached */ if (hif_dev->tx.tx_skb_cnt > MAX_TX_BUF_NUM) { spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags); return -ENOMEM; } spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags); tx_ctl = HTC_SKB_CB(skb); /* Mgmt/Beacon frames don't use the TX buffer pool */ if ((tx_ctl->type == ATH9K_HTC_MGMT) || (tx_ctl->type == ATH9K_HTC_BEACON)) { ret = hif_usb_send_mgmt(hif_dev, skb); } spin_lock_irqsave(&hif_dev->tx.tx_lock, flags); if ((tx_ctl->type == ATH9K_HTC_NORMAL) || (tx_ctl->type == ATH9K_HTC_AMPDU)) { __skb_queue_tail(&hif_dev->tx.tx_skb_queue, skb); hif_dev->tx.tx_skb_cnt++; } /* Check if AMPDUs have to be sent immediately */ if ((hif_dev->tx.tx_buf_cnt == MAX_TX_URB_NUM) && (hif_dev->tx.tx_skb_cnt < 2)) { __hif_usb_tx(hif_dev); } spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags); return ret; } static void hif_usb_start(void *hif_handle) { struct hif_device_usb *hif_dev = hif_handle; unsigned long flags; hif_dev->flags |= HIF_USB_START; spin_lock_irqsave(&hif_dev->tx.tx_lock, flags); hif_dev->tx.flags &= ~HIF_USB_TX_STOP; spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags); } static void hif_usb_stop(void *hif_handle) { struct hif_device_usb *hif_dev = hif_handle; struct tx_buf *tx_buf = NULL, *tx_buf_tmp = NULL; unsigned long flags; spin_lock_irqsave(&hif_dev->tx.tx_lock, flags); ath9k_skb_queue_complete(hif_dev, &hif_dev->tx.tx_skb_queue, false); hif_dev->tx.tx_skb_cnt = 0; hif_dev->tx.flags |= HIF_USB_TX_STOP; spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags); /* The pending URBs have to be canceled. */ spin_lock_irqsave(&hif_dev->tx.tx_lock, flags); list_for_each_entry_safe(tx_buf, tx_buf_tmp, &hif_dev->tx.tx_pending, list) { usb_get_urb(tx_buf->urb); spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags); usb_kill_urb(tx_buf->urb); list_del(&tx_buf->list); usb_free_urb(tx_buf->urb); kfree(tx_buf->buf); kfree(tx_buf); spin_lock_irqsave(&hif_dev->tx.tx_lock, flags); } spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags); usb_kill_anchored_urbs(&hif_dev->mgmt_submitted); } static int hif_usb_send(void *hif_handle, u8 pipe_id, struct sk_buff *skb) { struct hif_device_usb *hif_dev = hif_handle; int ret = 0; switch (pipe_id) { case USB_WLAN_TX_PIPE: ret = hif_usb_send_tx(hif_dev, skb); break; case USB_REG_OUT_PIPE: ret = hif_usb_send_regout(hif_dev, skb); break; default: dev_err(&hif_dev->udev->dev, "ath9k_htc: Invalid TX pipe: %d\n", pipe_id); ret = -EINVAL; break; } return ret; } static inline bool check_index(struct sk_buff *skb, u8 idx) { struct ath9k_htc_tx_ctl *tx_ctl; tx_ctl = HTC_SKB_CB(skb); if ((tx_ctl->type == ATH9K_HTC_AMPDU) && (tx_ctl->sta_idx == idx)) return true; return false; } static void hif_usb_sta_drain(void *hif_handle, u8 idx) { struct hif_device_usb *hif_dev = hif_handle; struct sk_buff *skb, *tmp; unsigned long flags; spin_lock_irqsave(&hif_dev->tx.tx_lock, flags); skb_queue_walk_safe(&hif_dev->tx.tx_skb_queue, skb, tmp) { if (check_index(skb, idx)) { __skb_unlink(skb, &hif_dev->tx.tx_skb_queue); ath9k_htc_txcompletion_cb(hif_dev->htc_handle, skb, false); hif_dev->tx.tx_skb_cnt--; TX_STAT_INC(hif_dev, skb_failed); } } spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags); } static struct ath9k_htc_hif hif_usb = { .transport = ATH9K_HIF_USB, .name = "ath9k_hif_usb", .control_ul_pipe = USB_REG_OUT_PIPE, .control_dl_pipe = USB_REG_IN_PIPE, .start = hif_usb_start, .stop = hif_usb_stop, .sta_drain = hif_usb_sta_drain, .send = hif_usb_send, }; /* Need to free remain_skb allocated in ath9k_hif_usb_rx_stream * in case ath9k_hif_usb_rx_stream wasn't called next time to * process the buffer and subsequently free it. */ static void ath9k_hif_usb_free_rx_remain_skb(struct hif_device_usb *hif_dev) { unsigned long flags; spin_lock_irqsave(&hif_dev->rx_lock, flags); if (hif_dev->remain_skb) { dev_kfree_skb_any(hif_dev->remain_skb); hif_dev->remain_skb = NULL; hif_dev->rx_remain_len = 0; RX_STAT_INC(hif_dev, skb_dropped); } spin_unlock_irqrestore(&hif_dev->rx_lock, flags); } static void ath9k_hif_usb_rx_stream(struct hif_device_usb *hif_dev, struct sk_buff *skb) { struct sk_buff *nskb, *skb_pool[MAX_PKT_NUM_IN_TRANSFER]; int index = 0, i, len = skb->len; int rx_remain_len, rx_pkt_len; u16 pool_index = 0; u8 *ptr; spin_lock(&hif_dev->rx_lock); rx_remain_len = hif_dev->rx_remain_len; rx_pkt_len = hif_dev->rx_transfer_len; if (rx_remain_len != 0) { struct sk_buff *remain_skb = hif_dev->remain_skb; if (remain_skb) { ptr = (u8 *) remain_skb->data; index = rx_remain_len; rx_remain_len -= hif_dev->rx_pad_len; ptr += rx_pkt_len; memcpy(ptr, skb->data, rx_remain_len); rx_pkt_len += rx_remain_len; skb_put(remain_skb, rx_pkt_len); skb_pool[pool_index++] = remain_skb; hif_dev->remain_skb = NULL; hif_dev->rx_remain_len = 0; } else { index = rx_remain_len; } } spin_unlock(&hif_dev->rx_lock); while (index < len) { u16 pkt_len; u16 pkt_tag; u16 pad_len; int chk_idx; ptr = (u8 *) skb->data; pkt_len = get_unaligned_le16(ptr + index); pkt_tag = get_unaligned_le16(ptr + index + 2); /* It is supposed that if we have an invalid pkt_tag or * pkt_len then the whole input SKB is considered invalid * and dropped; the associated packets already in skb_pool * are dropped, too. */ if (pkt_tag != ATH_USB_RX_STREAM_MODE_TAG) { RX_STAT_INC(hif_dev, skb_dropped); goto invalid_pkt; } if (pkt_len > 2 * MAX_RX_BUF_SIZE) { dev_err(&hif_dev->udev->dev, "ath9k_htc: invalid pkt_len (%x)\n", pkt_len); RX_STAT_INC(hif_dev, skb_dropped); goto invalid_pkt; } pad_len = 4 - (pkt_len & 0x3); if (pad_len == 4) pad_len = 0; chk_idx = index; index = index + 4 + pkt_len + pad_len; if (index > MAX_RX_BUF_SIZE) { spin_lock(&hif_dev->rx_lock); nskb = __dev_alloc_skb(pkt_len + 32, GFP_ATOMIC); if (!nskb) { dev_err(&hif_dev->udev->dev, "ath9k_htc: RX memory allocation error\n"); spin_unlock(&hif_dev->rx_lock); goto err; } hif_dev->rx_remain_len = index - MAX_RX_BUF_SIZE; hif_dev->rx_transfer_len = MAX_RX_BUF_SIZE - chk_idx - 4; hif_dev->rx_pad_len = pad_len; skb_reserve(nskb, 32); RX_STAT_INC(hif_dev, skb_allocated); memcpy(nskb->data, &(skb->data[chk_idx+4]), hif_dev->rx_transfer_len); /* Record the buffer pointer */ hif_dev->remain_skb = nskb; spin_unlock(&hif_dev->rx_lock); } else { if (pool_index == MAX_PKT_NUM_IN_TRANSFER) { dev_err(&hif_dev->udev->dev, "ath9k_htc: over RX MAX_PKT_NUM\n"); goto err; } nskb = __dev_alloc_skb(pkt_len + 32, GFP_ATOMIC); if (!nskb) { dev_err(&hif_dev->udev->dev, "ath9k_htc: RX memory allocation error\n"); goto err; } skb_reserve(nskb, 32); RX_STAT_INC(hif_dev, skb_allocated); memcpy(nskb->data, &(skb->data[chk_idx+4]), pkt_len); skb_put(nskb, pkt_len); skb_pool[pool_index++] = nskb; } } err: for (i = 0; i < pool_index; i++) { RX_STAT_ADD(hif_dev, skb_completed_bytes, skb_pool[i]->len); ath9k_htc_rx_msg(hif_dev->htc_handle, skb_pool[i], skb_pool[i]->len, USB_WLAN_RX_PIPE); RX_STAT_INC(hif_dev, skb_completed); } return; invalid_pkt: for (i = 0; i < pool_index; i++) { dev_kfree_skb_any(skb_pool[i]); RX_STAT_INC(hif_dev, skb_dropped); } return; } static void ath9k_hif_usb_rx_cb(struct urb *urb) { struct rx_buf *rx_buf = urb->context; struct hif_device_usb *hif_dev = rx_buf->hif_dev; struct sk_buff *skb = rx_buf->skb; int ret; if (!skb) return; if (!hif_dev) goto free; switch (urb->status) { case 0: break; case -ENOENT: case -ECONNRESET: case -ENODEV: case -ESHUTDOWN: goto free; default: goto resubmit; } if (likely(urb->actual_length != 0)) { skb_put(skb, urb->actual_length); ath9k_hif_usb_rx_stream(hif_dev, skb); } resubmit: __skb_set_length(skb, 0); usb_anchor_urb(urb, &hif_dev->rx_submitted); ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret) { usb_unanchor_urb(urb); goto free; } return; free: kfree_skb(skb); kfree(rx_buf); } static void ath9k_hif_usb_reg_in_cb(struct urb *urb) { struct rx_buf *rx_buf = urb->context; struct hif_device_usb *hif_dev = rx_buf->hif_dev; struct sk_buff *skb = rx_buf->skb; int ret; if (!skb) return; if (!hif_dev) goto free_skb; switch (urb->status) { case 0: break; case -ENOENT: case -ECONNRESET: case -ENODEV: case -ESHUTDOWN: goto free_skb; default: __skb_set_length(skb, 0); goto resubmit; } if (likely(urb->actual_length != 0)) { skb_put(skb, urb->actual_length); /* * Process the command first. * skb is either freed here or passed to be * managed to another callback function. */ ath9k_htc_rx_msg(hif_dev->htc_handle, skb, skb->len, USB_REG_IN_PIPE); skb = alloc_skb(MAX_REG_IN_BUF_SIZE, GFP_ATOMIC); if (!skb) { dev_err(&hif_dev->udev->dev, "ath9k_htc: REG_IN memory allocation failure\n"); goto free_rx_buf; } rx_buf->skb = skb; usb_fill_int_urb(urb, hif_dev->udev, usb_rcvintpipe(hif_dev->udev, USB_REG_IN_PIPE), skb->data, MAX_REG_IN_BUF_SIZE, ath9k_hif_usb_reg_in_cb, rx_buf, 1); } resubmit: usb_anchor_urb(urb, &hif_dev->reg_in_submitted); ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret) { usb_unanchor_urb(urb); goto free_skb; } return; free_skb: kfree_skb(skb); free_rx_buf: kfree(rx_buf); urb->context = NULL; } static void ath9k_hif_usb_dealloc_tx_urbs(struct hif_device_usb *hif_dev) { struct tx_buf *tx_buf = NULL, *tx_buf_tmp = NULL; unsigned long flags; spin_lock_irqsave(&hif_dev->tx.tx_lock, flags); list_for_each_entry_safe(tx_buf, tx_buf_tmp, &hif_dev->tx.tx_buf, list) { list_del(&tx_buf->list); usb_free_urb(tx_buf->urb); kfree(tx_buf->buf); kfree(tx_buf); } spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags); spin_lock_irqsave(&hif_dev->tx.tx_lock, flags); hif_dev->tx.flags |= HIF_USB_TX_FLUSH; spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags); spin_lock_irqsave(&hif_dev->tx.tx_lock, flags); list_for_each_entry_safe(tx_buf, tx_buf_tmp, &hif_dev->tx.tx_pending, list) { usb_get_urb(tx_buf->urb); spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags); usb_kill_urb(tx_buf->urb); list_del(&tx_buf->list); usb_free_urb(tx_buf->urb); kfree(tx_buf->buf); kfree(tx_buf); spin_lock_irqsave(&hif_dev->tx.tx_lock, flags); } spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags); usb_kill_anchored_urbs(&hif_dev->mgmt_submitted); } static int ath9k_hif_usb_alloc_tx_urbs(struct hif_device_usb *hif_dev) { struct tx_buf *tx_buf; int i; INIT_LIST_HEAD(&hif_dev->tx.tx_buf); INIT_LIST_HEAD(&hif_dev->tx.tx_pending); spin_lock_init(&hif_dev->tx.tx_lock); __skb_queue_head_init(&hif_dev->tx.tx_skb_queue); init_usb_anchor(&hif_dev->mgmt_submitted); for (i = 0; i < MAX_TX_URB_NUM; i++) { tx_buf = kzalloc(sizeof(*tx_buf), GFP_KERNEL); if (!tx_buf) goto err; tx_buf->buf = kzalloc(MAX_TX_BUF_SIZE, GFP_KERNEL); if (!tx_buf->buf) goto err; tx_buf->urb = usb_alloc_urb(0, GFP_KERNEL); if (!tx_buf->urb) goto err; tx_buf->hif_dev = hif_dev; __skb_queue_head_init(&tx_buf->skb_queue); list_add_tail(&tx_buf->list, &hif_dev->tx.tx_buf); } hif_dev->tx.tx_buf_cnt = MAX_TX_URB_NUM; return 0; err: if (tx_buf) { kfree(tx_buf->buf); kfree(tx_buf); } ath9k_hif_usb_dealloc_tx_urbs(hif_dev); return -ENOMEM; } static void ath9k_hif_usb_dealloc_rx_urbs(struct hif_device_usb *hif_dev) { usb_kill_anchored_urbs(&hif_dev->rx_submitted); ath9k_hif_usb_free_rx_remain_skb(hif_dev); } static int ath9k_hif_usb_alloc_rx_urbs(struct hif_device_usb *hif_dev) { struct rx_buf *rx_buf = NULL; struct sk_buff *skb = NULL; struct urb *urb = NULL; int i, ret; init_usb_anchor(&hif_dev->rx_submitted); spin_lock_init(&hif_dev->rx_lock); for (i = 0; i < MAX_RX_URB_NUM; i++) { rx_buf = kzalloc(sizeof(*rx_buf), GFP_KERNEL); if (!rx_buf) { ret = -ENOMEM; goto err_rxb; } /* Allocate URB */ urb = usb_alloc_urb(0, GFP_KERNEL); if (urb == NULL) { ret = -ENOMEM; goto err_urb; } /* Allocate buffer */ skb = alloc_skb(MAX_RX_BUF_SIZE, GFP_KERNEL); if (!skb) { ret = -ENOMEM; goto err_skb; } rx_buf->hif_dev = hif_dev; rx_buf->skb = skb; usb_fill_bulk_urb(urb, hif_dev->udev, usb_rcvbulkpipe(hif_dev->udev, USB_WLAN_RX_PIPE), skb->data, MAX_RX_BUF_SIZE, ath9k_hif_usb_rx_cb, rx_buf); /* Anchor URB */ usb_anchor_urb(urb, &hif_dev->rx_submitted); /* Submit URB */ ret = usb_submit_urb(urb, GFP_KERNEL); if (ret) { usb_unanchor_urb(urb); goto err_submit; } /* * Drop reference count. * This ensures that the URB is freed when killing them. */ usb_free_urb(urb); } return 0; err_submit: kfree_skb(skb); err_skb: usb_free_urb(urb); err_urb: kfree(rx_buf); err_rxb: ath9k_hif_usb_dealloc_rx_urbs(hif_dev); return ret; } static void ath9k_hif_usb_dealloc_reg_in_urbs(struct hif_device_usb *hif_dev) { usb_kill_anchored_urbs(&hif_dev->reg_in_submitted); } static int ath9k_hif_usb_alloc_reg_in_urbs(struct hif_device_usb *hif_dev) { struct rx_buf *rx_buf = NULL; struct sk_buff *skb = NULL; struct urb *urb = NULL; int i, ret; init_usb_anchor(&hif_dev->reg_in_submitted); for (i = 0; i < MAX_REG_IN_URB_NUM; i++) { rx_buf = kzalloc(sizeof(*rx_buf), GFP_KERNEL); if (!rx_buf) { ret = -ENOMEM; goto err_rxb; } /* Allocate URB */ urb = usb_alloc_urb(0, GFP_KERNEL); if (urb == NULL) { ret = -ENOMEM; goto err_urb; } /* Allocate buffer */ skb = alloc_skb(MAX_REG_IN_BUF_SIZE, GFP_KERNEL); if (!skb) { ret = -ENOMEM; goto err_skb; } rx_buf->hif_dev = hif_dev; rx_buf->skb = skb; usb_fill_int_urb(urb, hif_dev->udev, usb_rcvintpipe(hif_dev->udev, USB_REG_IN_PIPE), skb->data, MAX_REG_IN_BUF_SIZE, ath9k_hif_usb_reg_in_cb, rx_buf, 1); /* Anchor URB */ usb_anchor_urb(urb, &hif_dev->reg_in_submitted); /* Submit URB */ ret = usb_submit_urb(urb, GFP_KERNEL); if (ret) { usb_unanchor_urb(urb); goto err_submit; } /* * Drop reference count. * This ensures that the URB is freed when killing them. */ usb_free_urb(urb); } return 0; err_submit: kfree_skb(skb); err_skb: usb_free_urb(urb); err_urb: kfree(rx_buf); err_rxb: ath9k_hif_usb_dealloc_reg_in_urbs(hif_dev); return ret; } static int ath9k_hif_usb_alloc_urbs(struct hif_device_usb *hif_dev) { /* Register Write */ init_usb_anchor(&hif_dev->regout_submitted); /* TX */ if (ath9k_hif_usb_alloc_tx_urbs(hif_dev) < 0) goto err; /* RX */ if (ath9k_hif_usb_alloc_rx_urbs(hif_dev) < 0) goto err_rx; /* Register Read */ if (ath9k_hif_usb_alloc_reg_in_urbs(hif_dev) < 0) goto err_reg; return 0; err_reg: ath9k_hif_usb_dealloc_rx_urbs(hif_dev); err_rx: ath9k_hif_usb_dealloc_tx_urbs(hif_dev); err: return -ENOMEM; } void ath9k_hif_usb_dealloc_urbs(struct hif_device_usb *hif_dev) { usb_kill_anchored_urbs(&hif_dev->regout_submitted); ath9k_hif_usb_dealloc_reg_in_urbs(hif_dev); ath9k_hif_usb_dealloc_tx_urbs(hif_dev); ath9k_hif_usb_dealloc_rx_urbs(hif_dev); } static int ath9k_hif_usb_download_fw(struct hif_device_usb *hif_dev) { int transfer, err; const void *data = hif_dev->fw_data; size_t len = hif_dev->fw_size; u32 addr = AR9271_FIRMWARE; u8 *buf = kzalloc(4096, GFP_KERNEL); u32 firm_offset; if (!buf) return -ENOMEM; while (len) { transfer = min_t(size_t, len, 4096); memcpy(buf, data, transfer); err = usb_control_msg(hif_dev->udev, usb_sndctrlpipe(hif_dev->udev, 0), FIRMWARE_DOWNLOAD, 0x40 | USB_DIR_OUT, addr >> 8, 0, buf, transfer, USB_MSG_TIMEOUT); if (err < 0) { kfree(buf); return err; } len -= transfer; data += transfer; addr += transfer; } kfree(buf); if (IS_AR7010_DEVICE(hif_dev->usb_device_id->driver_info)) firm_offset = AR7010_FIRMWARE_TEXT; else firm_offset = AR9271_FIRMWARE_TEXT; /* * Issue FW download complete command to firmware. */ err = usb_control_msg(hif_dev->udev, usb_sndctrlpipe(hif_dev->udev, 0), FIRMWARE_DOWNLOAD_COMP, 0x40 | USB_DIR_OUT, firm_offset >> 8, 0, NULL, 0, USB_MSG_TIMEOUT); if (err) return -EIO; dev_info(&hif_dev->udev->dev, "ath9k_htc: Transferred FW: %s, size: %ld\n", hif_dev->fw_name, (unsigned long) hif_dev->fw_size); return 0; } static int ath9k_hif_usb_dev_init(struct hif_device_usb *hif_dev) { int ret; ret = ath9k_hif_usb_download_fw(hif_dev); if (ret) { dev_err(&hif_dev->udev->dev, "ath9k_htc: Firmware - %s download failed\n", hif_dev->fw_name); return ret; } /* Alloc URBs */ ret = ath9k_hif_usb_alloc_urbs(hif_dev); if (ret) { dev_err(&hif_dev->udev->dev, "ath9k_htc: Unable to allocate URBs\n"); return ret; } return 0; } static void ath9k_hif_usb_dev_deinit(struct hif_device_usb *hif_dev) { ath9k_hif_usb_dealloc_urbs(hif_dev); } /* * If initialization fails or the FW cannot be retrieved, * detach the device. */ static void ath9k_hif_usb_firmware_fail(struct hif_device_usb *hif_dev) { struct device *dev = &hif_dev->udev->dev; struct device *parent = dev->parent; complete_all(&hif_dev->fw_done); if (parent) device_lock(parent); device_release_driver(dev); if (parent) device_unlock(parent); } static void ath9k_hif_usb_firmware_cb(const struct firmware *fw, void *context); /* taken from iwlwifi */ static int ath9k_hif_request_firmware(struct hif_device_usb *hif_dev, bool first) { char index[8], *chip; int ret; if (first) { if (htc_use_dev_fw) { hif_dev->fw_minor_index = FIRMWARE_MINOR_IDX_MAX + 1; sprintf(index, "%s", "dev"); } else { hif_dev->fw_minor_index = FIRMWARE_MINOR_IDX_MAX; sprintf(index, "%d", hif_dev->fw_minor_index); } } else { hif_dev->fw_minor_index--; sprintf(index, "%d", hif_dev->fw_minor_index); } /* test for FW 1.3 */ if (MAJOR_VERSION_REQ == 1 && hif_dev->fw_minor_index == 3) { const char *filename; if (IS_AR7010_DEVICE(hif_dev->usb_device_id->driver_info)) filename = FIRMWARE_AR7010_1_1; else filename = FIRMWARE_AR9271; /* expected fw locations: * - htc_9271.fw (stable version 1.3, deprecated) */ snprintf(hif_dev->fw_name, sizeof(hif_dev->fw_name), "%s", filename); } else if (hif_dev->fw_minor_index < FIRMWARE_MINOR_IDX_MIN) { dev_err(&hif_dev->udev->dev, "no suitable firmware found!\n"); return -ENOENT; } else { if (IS_AR7010_DEVICE(hif_dev->usb_device_id->driver_info)) chip = "7010"; else chip = "9271"; /* expected fw locations: * - ath9k_htc/htc_9271-1.dev.0.fw (development version) * - ath9k_htc/htc_9271-1.4.0.fw (stable version) */ snprintf(hif_dev->fw_name, sizeof(hif_dev->fw_name), "%s/htc_%s-%d.%s.0.fw", HTC_FW_PATH, chip, MAJOR_VERSION_REQ, index); } ret = request_firmware_nowait(THIS_MODULE, true, hif_dev->fw_name, &hif_dev->udev->dev, GFP_KERNEL, hif_dev, ath9k_hif_usb_firmware_cb); if (ret) { dev_err(&hif_dev->udev->dev, "ath9k_htc: Async request for firmware %s failed\n", hif_dev->fw_name); return ret; } dev_info(&hif_dev->udev->dev, "ath9k_htc: Firmware %s requested\n", hif_dev->fw_name); return ret; } static void ath9k_hif_usb_firmware_cb(const struct firmware *fw, void *context) { struct hif_device_usb *hif_dev = context; int ret; if (!fw) { ret = ath9k_hif_request_firmware(hif_dev, false); if (!ret) return; dev_err(&hif_dev->udev->dev, "ath9k_htc: Failed to get firmware %s\n", hif_dev->fw_name); goto err_fw; } hif_dev->htc_handle = ath9k_htc_hw_alloc(hif_dev, &hif_usb, &hif_dev->udev->dev); if (hif_dev->htc_handle == NULL) goto err_dev_alloc; hif_dev->fw_data = fw->data; hif_dev->fw_size = fw->size; /* Proceed with initialization */ ret = ath9k_hif_usb_dev_init(hif_dev); if (ret) goto err_dev_init; ret = ath9k_htc_hw_init(hif_dev->htc_handle, &hif_dev->interface->dev, hif_dev->usb_device_id->idProduct, hif_dev->udev->product, hif_dev->usb_device_id->driver_info); if (ret) { ret = -EINVAL; goto err_htc_hw_init; } release_firmware(fw); hif_dev->flags |= HIF_USB_READY; complete_all(&hif_dev->fw_done); return; err_htc_hw_init: ath9k_hif_usb_dev_deinit(hif_dev); err_dev_init: ath9k_htc_hw_free(hif_dev->htc_handle); err_dev_alloc: release_firmware(fw); err_fw: ath9k_hif_usb_firmware_fail(hif_dev); } /* * An exact copy of the function from zd1211rw. */ static int send_eject_command(struct usb_interface *interface) { struct usb_device *udev = interface_to_usbdev(interface); struct usb_host_interface *iface_desc = interface->cur_altsetting; struct usb_endpoint_descriptor *endpoint; unsigned char *cmd; u8 bulk_out_ep; int r; if (iface_desc->desc.bNumEndpoints < 2) return -ENODEV; /* Find bulk out endpoint */ for (r = 1; r >= 0; r--) { endpoint = &iface_desc->endpoint[r].desc; if (usb_endpoint_dir_out(endpoint) && usb_endpoint_xfer_bulk(endpoint)) { bulk_out_ep = endpoint->bEndpointAddress; break; } } if (r == -1) { dev_err(&udev->dev, "ath9k_htc: Could not find bulk out endpoint\n"); return -ENODEV; } cmd = kzalloc(31, GFP_KERNEL); if (cmd == NULL) return -ENODEV; /* USB bulk command block */ cmd[0] = 0x55; /* bulk command signature */ cmd[1] = 0x53; /* bulk command signature */ cmd[2] = 0x42; /* bulk command signature */ cmd[3] = 0x43; /* bulk command signature */ cmd[14] = 6; /* command length */ cmd[15] = 0x1b; /* SCSI command: START STOP UNIT */ cmd[19] = 0x2; /* eject disc */ dev_info(&udev->dev, "Ejecting storage device...\n"); r = usb_bulk_msg(udev, usb_sndbulkpipe(udev, bulk_out_ep), cmd, 31, NULL, 2 * USB_MSG_TIMEOUT); kfree(cmd); if (r) return r; /* At this point, the device disconnects and reconnects with the real * ID numbers. */ usb_set_intfdata(interface, NULL); return 0; } static int ath9k_hif_usb_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_endpoint_descriptor *bulk_in, *bulk_out, *int_in, *int_out; struct usb_device *udev = interface_to_usbdev(interface); struct usb_host_interface *alt; struct hif_device_usb *hif_dev; int ret = 0; /* Verify the expected endpoints are present */ alt = interface->cur_altsetting; if (usb_find_common_endpoints(alt, &bulk_in, &bulk_out, &int_in, &int_out) < 0 || usb_endpoint_num(bulk_in) != USB_WLAN_RX_PIPE || usb_endpoint_num(bulk_out) != USB_WLAN_TX_PIPE || usb_endpoint_num(int_in) != USB_REG_IN_PIPE || usb_endpoint_num(int_out) != USB_REG_OUT_PIPE) { dev_err(&udev->dev, "ath9k_htc: Device endpoint numbers are not the expected ones\n"); return -ENODEV; } if (id->driver_info == STORAGE_DEVICE) return send_eject_command(interface); hif_dev = kzalloc(sizeof(struct hif_device_usb), GFP_KERNEL); if (!hif_dev) { ret = -ENOMEM; goto err_alloc; } usb_get_dev(udev); hif_dev->udev = udev; hif_dev->interface = interface; hif_dev->usb_device_id = id; #ifdef CONFIG_PM udev->reset_resume = 1; #endif usb_set_intfdata(interface, hif_dev); init_completion(&hif_dev->fw_done); ret = ath9k_hif_request_firmware(hif_dev, true); if (ret) goto err_fw_req; return ret; err_fw_req: usb_set_intfdata(interface, NULL); kfree(hif_dev); usb_put_dev(udev); err_alloc: return ret; } static void ath9k_hif_usb_reboot(struct usb_device *udev) { u32 reboot_cmd = 0xffffffff; void *buf; int ret; buf = kmemdup(&reboot_cmd, 4, GFP_KERNEL); if (!buf) return; ret = usb_interrupt_msg(udev, usb_sndintpipe(udev, USB_REG_OUT_PIPE), buf, 4, NULL, USB_MSG_TIMEOUT); if (ret) dev_err(&udev->dev, "ath9k_htc: USB reboot failed\n"); kfree(buf); } static void ath9k_hif_usb_disconnect(struct usb_interface *interface) { struct usb_device *udev = interface_to_usbdev(interface); struct hif_device_usb *hif_dev = usb_get_intfdata(interface); bool unplugged = udev->state == USB_STATE_NOTATTACHED; if (!hif_dev) return; wait_for_completion(&hif_dev->fw_done); if (hif_dev->flags & HIF_USB_READY) { ath9k_htc_hw_deinit(hif_dev->htc_handle, unplugged); ath9k_htc_hw_free(hif_dev->htc_handle); } usb_set_intfdata(interface, NULL); /* If firmware was loaded we should drop it * go back to first stage bootloader. */ if (!unplugged && (hif_dev->flags & HIF_USB_READY)) ath9k_hif_usb_reboot(udev); kfree(hif_dev); dev_info(&udev->dev, "ath9k_htc: USB layer deinitialized\n"); usb_put_dev(udev); } #ifdef CONFIG_PM static int ath9k_hif_usb_suspend(struct usb_interface *interface, pm_message_t message) { struct hif_device_usb *hif_dev = usb_get_intfdata(interface); /* * The device has to be set to FULLSLEEP mode in case no * interface is up. */ if (!(hif_dev->flags & HIF_USB_START)) ath9k_htc_suspend(hif_dev->htc_handle); wait_for_completion(&hif_dev->fw_done); if (hif_dev->flags & HIF_USB_READY) ath9k_hif_usb_dealloc_urbs(hif_dev); return 0; } static int ath9k_hif_usb_resume(struct usb_interface *interface) { struct hif_device_usb *hif_dev = usb_get_intfdata(interface); struct htc_target *htc_handle = hif_dev->htc_handle; const struct firmware *fw; int ret; ret = ath9k_hif_usb_alloc_urbs(hif_dev); if (ret) return ret; if (!(hif_dev->flags & HIF_USB_READY)) { ret = -EIO; goto fail_resume; } /* request cached firmware during suspend/resume cycle */ ret = request_firmware(&fw, hif_dev->fw_name, &hif_dev->udev->dev); if (ret) goto fail_resume; hif_dev->fw_data = fw->data; hif_dev->fw_size = fw->size; ret = ath9k_hif_usb_download_fw(hif_dev); release_firmware(fw); if (ret) goto fail_resume; mdelay(100); ret = ath9k_htc_resume(htc_handle); if (ret) goto fail_resume; return 0; fail_resume: ath9k_hif_usb_dealloc_urbs(hif_dev); return ret; } #endif static struct usb_driver ath9k_hif_usb_driver = { .name = KBUILD_MODNAME, .probe = ath9k_hif_usb_probe, .disconnect = ath9k_hif_usb_disconnect, #ifdef CONFIG_PM .suspend = ath9k_hif_usb_suspend, .resume = ath9k_hif_usb_resume, .reset_resume = ath9k_hif_usb_resume, #endif .id_table = ath9k_hif_usb_ids, .soft_unbind = 1, .disable_hub_initiated_lpm = 1, }; int ath9k_hif_usb_init(void) { return usb_register(&ath9k_hif_usb_driver); } void ath9k_hif_usb_exit(void) { usb_deregister(&ath9k_hif_usb_driver); }
3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 // SPDX-License-Identifier: GPL-2.0-only /* * Ram backed block device driver. * * Copyright (C) 2007 Nick Piggin * Copyright (C) 2007 Novell Inc. * * Parts derived from drivers/block/rd.c, and drivers/block/loop.c, copyright * of their respective owners. */ #include <linux/init.h> #include <linux/initrd.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/major.h> #include <linux/blkdev.h> #include <linux/bio.h> #include <linux/highmem.h> #include <linux/mutex.h> #include <linux/pagemap.h> #include <linux/xarray.h> #include <linux/fs.h> #include <linux/slab.h> #include <linux/backing-dev.h> #include <linux/debugfs.h> #include <linux/uaccess.h> /* * Each block ramdisk device has a xarray brd_pages of pages that stores * the pages containing the block device's contents. */ struct brd_device { int brd_number; struct gendisk *brd_disk; struct list_head brd_list; /* * Backing store of pages. This is the contents of the block device. */ struct xarray brd_pages; u64 brd_nr_pages; }; /* * Look up and return a brd's page for a given sector. */ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) { return xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT); } /* * Insert a new page for a given sector, if one does not already exist. */ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector, blk_opf_t opf) __releases(rcu) __acquires(rcu) { gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO; struct page *page, *ret; rcu_read_unlock(); page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM); if (!page) { rcu_read_lock(); return ERR_PTR(-ENOMEM); } xa_lock(&brd->brd_pages); ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL, page, gfp); rcu_read_lock(); if (ret) { xa_unlock(&brd->brd_pages); __free_page(page); if (xa_is_err(ret)) return ERR_PTR(xa_err(ret)); return ret; } brd->brd_nr_pages++; xa_unlock(&brd->brd_pages); return page; } /* * Free all backing store pages and xarray. This must only be called when * there are no other users of the device. */ static void brd_free_pages(struct brd_device *brd) { struct page *page; pgoff_t idx; xa_for_each(&brd->brd_pages, idx, page) { __free_page(page); cond_resched(); } xa_destroy(&brd->brd_pages); } /* * Process a single segment. The segment is capped to not cross page boundaries * in both the bio and the brd backing memory. */ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio) { struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter); sector_t sector = bio->bi_iter.bi_sector; u32 offset = (sector & (PAGE_SECTORS - 1)) << SECTOR_SHIFT; blk_opf_t opf = bio->bi_opf; struct page *page; void *kaddr; bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); rcu_read_lock(); page = brd_lookup_page(brd, sector); if (!page && op_is_write(opf)) { page = brd_insert_page(brd, sector, opf); if (IS_ERR(page)) goto out_error; } kaddr = bvec_kmap_local(&bv); if (op_is_write(opf)) { memcpy_to_page(page, offset, kaddr, bv.bv_len); } else { if (page) memcpy_from_page(kaddr, page, offset, bv.bv_len); else memset(kaddr, 0, bv.bv_len); } kunmap_local(kaddr); rcu_read_unlock(); bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len); return true; out_error: rcu_read_unlock(); if (PTR_ERR(page) == -ENOMEM && (opf & REQ_NOWAIT)) bio_wouldblock_error(bio); else bio_io_error(bio); return false; } static void brd_free_one_page(struct rcu_head *head) { struct page *page = container_of(head, struct page, rcu_head); __free_page(page); } static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size) { sector_t aligned_sector = round_up(sector, PAGE_SECTORS); sector_t aligned_end = round_down( sector + (size >> SECTOR_SHIFT), PAGE_SECTORS); struct page *page; if (aligned_end <= aligned_sector) return; xa_lock(&brd->brd_pages); while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) { page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT); if (page) { call_rcu(&page->rcu_head, brd_free_one_page); brd->brd_nr_pages--; } aligned_sector += PAGE_SECTORS; } xa_unlock(&brd->brd_pages); } static void brd_submit_bio(struct bio *bio) { struct brd_device *brd = bio->bi_bdev->bd_disk->private_data; if (unlikely(op_is_discard(bio->bi_opf))) { brd_do_discard(brd, bio->bi_iter.bi_sector, bio->bi_iter.bi_size); bio_endio(bio); return; } do { if (!brd_rw_bvec(brd, bio)) return; } while (bio->bi_iter.bi_size); bio_endio(bio); } static const struct block_device_operations brd_fops = { .owner = THIS_MODULE, .submit_bio = brd_submit_bio, }; /* * And now the modules code and kernel interface. */ static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT; module_param(rd_nr, int, 0444); MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices"); unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE; module_param(rd_size, ulong, 0444); MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); static int max_part = 1; module_param(max_part, int, 0444); MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices"); MODULE_DESCRIPTION("Ram backed block device driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); MODULE_ALIAS("rd"); #ifndef MODULE /* Legacy boot options - nonmodular */ static int __init ramdisk_size(char *str) { rd_size = simple_strtol(str, NULL, 0); return 1; } __setup("ramdisk_size=", ramdisk_size); #endif /* * The device scheme is derived from loop.c. Keep them in synch where possible * (should share code eventually). */ static LIST_HEAD(brd_devices); static DEFINE_MUTEX(brd_devices_mutex); static struct dentry *brd_debugfs_dir; static struct brd_device *brd_find_or_alloc_device(int i) { struct brd_device *brd; mutex_lock(&brd_devices_mutex); list_for_each_entry(brd, &brd_devices, brd_list) { if (brd->brd_number == i) { mutex_unlock(&brd_devices_mutex); return ERR_PTR(-EEXIST); } } brd = kzalloc(sizeof(*brd), GFP_KERNEL); if (!brd) { mutex_unlock(&brd_devices_mutex); return ERR_PTR(-ENOMEM); } brd->brd_number = i; list_add_tail(&brd->brd_list, &brd_devices); mutex_unlock(&brd_devices_mutex); return brd; } static void brd_free_device(struct brd_device *brd) { mutex_lock(&brd_devices_mutex); list_del(&brd->brd_list); mutex_unlock(&brd_devices_mutex); kfree(brd); } static int brd_alloc(int i) { struct brd_device *brd; struct gendisk *disk; char buf[DISK_NAME_LEN]; int err = -ENOMEM; struct queue_limits lim = { /* * This is so fdisk will align partitions on 4k, because of * direct_access API needing 4k alignment, returning a PFN * (This is only a problem on very small devices <= 4M, * otherwise fdisk will align on 1M. Regardless this call * is harmless) */ .physical_block_size = PAGE_SIZE, .max_hw_discard_sectors = UINT_MAX, .max_discard_segments = 1, .discard_granularity = PAGE_SIZE, .features = BLK_FEAT_SYNCHRONOUS | BLK_FEAT_NOWAIT, }; brd = brd_find_or_alloc_device(i); if (IS_ERR(brd)) return PTR_ERR(brd); xa_init(&brd->brd_pages); snprintf(buf, DISK_NAME_LEN, "ram%d", i); if (!IS_ERR_OR_NULL(brd_debugfs_dir)) debugfs_create_u64(buf, 0444, brd_debugfs_dir, &brd->brd_nr_pages); disk = brd->brd_disk = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_free_dev; } disk->major = RAMDISK_MAJOR; disk->first_minor = i * max_part; disk->minors = max_part; disk->fops = &brd_fops; disk->private_data = brd; strscpy(disk->disk_name, buf, DISK_NAME_LEN); set_capacity(disk, rd_size * 2); err = add_disk(disk); if (err) goto out_cleanup_disk; return 0; out_cleanup_disk: put_disk(disk); out_free_dev: brd_free_device(brd); return err; } static void brd_probe(dev_t dev) { brd_alloc(MINOR(dev) / max_part); } static void brd_cleanup(void) { struct brd_device *brd, *next; debugfs_remove_recursive(brd_debugfs_dir); list_for_each_entry_safe(brd, next, &brd_devices, brd_list) { del_gendisk(brd->brd_disk); put_disk(brd->brd_disk); brd_free_pages(brd); brd_free_device(brd); } } static inline void brd_check_and_reset_par(void) { if (unlikely(!max_part)) max_part = 1; /* * make sure 'max_part' can be divided exactly by (1U << MINORBITS), * otherwise, it is possiable to get same dev_t when adding partitions. */ if ((1U << MINORBITS) % max_part != 0) max_part = 1UL << fls(max_part); if (max_part > DISK_MAX_PARTS) { pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n", DISK_MAX_PARTS, DISK_MAX_PARTS); max_part = DISK_MAX_PARTS; } } static int __init brd_init(void) { int err, i; /* * brd module now has a feature to instantiate underlying device * structure on-demand, provided that there is an access dev node. * * (1) if rd_nr is specified, create that many upfront. else * it defaults to CONFIG_BLK_DEV_RAM_COUNT * (2) User can further extend brd devices by create dev node themselves * and have kernel automatically instantiate actual device * on-demand. Example: * mknod /path/devnod_name b 1 X # 1 is the rd major * fdisk -l /path/devnod_name * If (X / max_part) was not already created it will be created * dynamically. */ brd_check_and_reset_par(); brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL); if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) { err = -EIO; goto out_free; } for (i = 0; i < rd_nr; i++) brd_alloc(i); pr_info("brd: module loaded\n"); return 0; out_free: brd_cleanup(); pr_info("brd: module NOT loaded !!!\n"); return err; } static void __exit brd_exit(void) { unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); brd_cleanup(); pr_info("brd: module unloaded\n"); } module_init(brd_init); module_exit(brd_exit);
684 77 878 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_FLOW_DISSECTOR_H #define _NET_FLOW_DISSECTOR_H #include <linux/types.h> #include <linux/in6.h> #include <linux/siphash.h> #include <linux/string.h> #include <uapi/linux/if_ether.h> #include <uapi/linux/pkt_cls.h> struct bpf_prog; struct net; struct sk_buff; /** * struct flow_dissector_key_control: * @thoff: Transport header offset * @addr_type: Type of key. One of FLOW_DISSECTOR_KEY_* * @flags: Key flags. * Any of FLOW_DIS_(IS_FRAGMENT|FIRST_FRAG|ENCAPSULATION|F_*) */ struct flow_dissector_key_control { u16 thoff; u16 addr_type; u32 flags; }; /* The control flags are kept in sync with TCA_FLOWER_KEY_FLAGS_*, as those * flags are exposed to userspace in some error paths, ie. unsupported flags. */ enum flow_dissector_ctrl_flags { FLOW_DIS_IS_FRAGMENT = TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_FIRST_FRAG = TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST, FLOW_DIS_F_TUNNEL_CSUM = TCA_FLOWER_KEY_FLAGS_TUNNEL_CSUM, FLOW_DIS_F_TUNNEL_DONT_FRAGMENT = TCA_FLOWER_KEY_FLAGS_TUNNEL_DONT_FRAGMENT, FLOW_DIS_F_TUNNEL_OAM = TCA_FLOWER_KEY_FLAGS_TUNNEL_OAM, FLOW_DIS_F_TUNNEL_CRIT_OPT = TCA_FLOWER_KEY_FLAGS_TUNNEL_CRIT_OPT, /* These flags are internal to the kernel */ FLOW_DIS_ENCAPSULATION = (TCA_FLOWER_KEY_FLAGS_MAX << 1), }; enum flow_dissect_ret { FLOW_DISSECT_RET_OUT_GOOD, FLOW_DISSECT_RET_OUT_BAD, FLOW_DISSECT_RET_PROTO_AGAIN, FLOW_DISSECT_RET_IPPROTO_AGAIN, FLOW_DISSECT_RET_CONTINUE, }; /** * struct flow_dissector_key_basic: * @n_proto: Network header protocol (eg. IPv4/IPv6) * @ip_proto: Transport header protocol (eg. TCP/UDP) * @padding: Unused */ struct flow_dissector_key_basic { __be16 n_proto; u8 ip_proto; u8 padding; }; struct flow_dissector_key_tags { u32 flow_label; }; struct flow_dissector_key_vlan { union { struct { u16 vlan_id:12, vlan_dei:1, vlan_priority:3; }; __be16 vlan_tci; }; __be16 vlan_tpid; __be16 vlan_eth_type; u16 padding; }; struct flow_dissector_mpls_lse { u32 mpls_ttl:8, mpls_bos:1, mpls_tc:3, mpls_label:20; }; #define FLOW_DIS_MPLS_MAX 7 struct flow_dissector_key_mpls { struct flow_dissector_mpls_lse ls[FLOW_DIS_MPLS_MAX]; /* Label Stack */ u8 used_lses; /* One bit set for each Label Stack Entry in use */ }; static inline void dissector_set_mpls_lse(struct flow_dissector_key_mpls *mpls, int lse_index) { mpls->used_lses |= 1 << lse_index; } #define FLOW_DIS_TUN_OPTS_MAX 255 /** * struct flow_dissector_key_enc_opts: * @data: tunnel option data * @len: length of tunnel option data * @dst_opt_type: tunnel option type */ struct flow_dissector_key_enc_opts { u8 data[FLOW_DIS_TUN_OPTS_MAX]; /* Using IP_TUNNEL_OPTS_MAX is desired * here but seems difficult to #include */ u8 len; u32 dst_opt_type; }; struct flow_dissector_key_keyid { __be32 keyid; }; /** * struct flow_dissector_key_ipv4_addrs: * @src: source ip address * @dst: destination ip address */ struct flow_dissector_key_ipv4_addrs { /* (src,dst) must be grouped, in the same way than in IP header */ __be32 src; __be32 dst; }; /** * struct flow_dissector_key_ipv6_addrs: * @src: source ip address * @dst: destination ip address */ struct flow_dissector_key_ipv6_addrs { /* (src,dst) must be grouped, in the same way than in IP header */ struct in6_addr src; struct in6_addr dst; }; /** * struct flow_dissector_key_tipc: * @key: source node address combined with selector */ struct flow_dissector_key_tipc { __be32 key; }; /** * struct flow_dissector_key_addrs: * @v4addrs: IPv4 addresses * @v6addrs: IPv6 addresses * @tipckey: TIPC key */ struct flow_dissector_key_addrs { union { struct flow_dissector_key_ipv4_addrs v4addrs; struct flow_dissector_key_ipv6_addrs v6addrs; struct flow_dissector_key_tipc tipckey; }; }; /** * struct flow_dissector_key_arp: * @sip: Sender IP address * @tip: Target IP address * @op: Operation * @sha: Sender hardware address * @tha: Target hardware address */ struct flow_dissector_key_arp { __u32 sip; __u32 tip; __u8 op; unsigned char sha[ETH_ALEN]; unsigned char tha[ETH_ALEN]; }; /** * struct flow_dissector_key_ports: * @ports: port numbers of Transport header * @src: source port number * @dst: destination port number */ struct flow_dissector_key_ports { union { __be32 ports; struct { __be16 src; __be16 dst; }; }; }; /** * struct flow_dissector_key_ports_range * @tp: port number from packet * @tp_min: min port number in range * @tp_max: max port number in range */ struct flow_dissector_key_ports_range { union { struct flow_dissector_key_ports tp; struct { struct flow_dissector_key_ports tp_min; struct flow_dissector_key_ports tp_max; }; }; }; /** * struct flow_dissector_key_icmp: * @type: ICMP type * @code: ICMP code * @id: Session identifier */ struct flow_dissector_key_icmp { struct { u8 type; u8 code; }; u16 id; }; /** * struct flow_dissector_key_eth_addrs: * @src: source Ethernet address * @dst: destination Ethernet address */ struct flow_dissector_key_eth_addrs { /* (dst,src) must be grouped, in the same way than in ETH header */ unsigned char dst[ETH_ALEN]; unsigned char src[ETH_ALEN]; }; /** * struct flow_dissector_key_tcp: * @flags: flags */ struct flow_dissector_key_tcp { __be16 flags; }; /** * struct flow_dissector_key_ip: * @tos: tos * @ttl: ttl */ struct flow_dissector_key_ip { __u8 tos; __u8 ttl; }; /** * struct flow_dissector_key_meta: * @ingress_ifindex: ingress ifindex * @ingress_iftype: ingress interface type * @l2_miss: packet did not match an L2 entry during forwarding */ struct flow_dissector_key_meta { int ingress_ifindex; u16 ingress_iftype; u8 l2_miss; }; /** * struct flow_dissector_key_ct: * @ct_state: conntrack state after converting with map * @ct_mark: conttrack mark * @ct_zone: conntrack zone * @ct_labels: conntrack labels */ struct flow_dissector_key_ct { u16 ct_state; u16 ct_zone; u32 ct_mark; u32 ct_labels[4]; }; /** * struct flow_dissector_key_hash: * @hash: hash value */ struct flow_dissector_key_hash { u32 hash; }; /** * struct flow_dissector_key_num_of_vlans: * @num_of_vlans: num_of_vlans value */ struct flow_dissector_key_num_of_vlans { u8 num_of_vlans; }; /** * struct flow_dissector_key_pppoe: * @session_id: pppoe session id * @ppp_proto: ppp protocol * @type: pppoe eth type */ struct flow_dissector_key_pppoe { __be16 session_id; __be16 ppp_proto; __be16 type; }; /** * struct flow_dissector_key_l2tpv3: * @session_id: identifier for a l2tp session */ struct flow_dissector_key_l2tpv3 { __be32 session_id; }; /** * struct flow_dissector_key_ipsec: * @spi: identifier for a ipsec connection */ struct flow_dissector_key_ipsec { __be32 spi; }; /** * struct flow_dissector_key_cfm * @mdl_ver: maintenance domain level (mdl) and cfm protocol version * @opcode: code specifying a type of cfm protocol packet * * See 802.1ag, ITU-T G.8013/Y.1731 * 1 2 * |7 6 5 4 3 2 1 0|7 6 5 4 3 2 1 0| * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | mdl | version | opcode | * +-----+---------+-+-+-+-+-+-+-+-+ */ struct flow_dissector_key_cfm { u8 mdl_ver; u8 opcode; }; #define FLOW_DIS_CFM_MDL_MASK GENMASK(7, 5) #define FLOW_DIS_CFM_MDL_MAX 7 enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_PORTS_RANGE, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC, /* struct flow_dissector_key_tipc */ FLOW_DISSECTOR_KEY_ARP, /* struct flow_dissector_key_arp */ FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_vlan */ FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_tags */ FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_ENC_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_ENC_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_ENC_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_MPLS, /* struct flow_dissector_key_mpls */ FLOW_DISSECTOR_KEY_TCP, /* struct flow_dissector_key_tcp */ FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */ FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_vlan */ FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */ FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */ FLOW_DISSECTOR_KEY_META, /* struct flow_dissector_key_meta */ FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */ FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */ FLOW_DISSECTOR_KEY_NUM_OF_VLANS, /* struct flow_dissector_key_num_of_vlans */ FLOW_DISSECTOR_KEY_PPPOE, /* struct flow_dissector_key_pppoe */ FLOW_DISSECTOR_KEY_L2TPV3, /* struct flow_dissector_key_l2tpv3 */ FLOW_DISSECTOR_KEY_CFM, /* struct flow_dissector_key_cfm */ FLOW_DISSECTOR_KEY_IPSEC, /* struct flow_dissector_key_ipsec */ FLOW_DISSECTOR_KEY_MAX, }; #define FLOW_DISSECTOR_F_PARSE_1ST_FRAG BIT(0) #define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL BIT(1) #define FLOW_DISSECTOR_F_STOP_AT_ENCAP BIT(2) #define FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP BIT(3) struct flow_dissector_key { enum flow_dissector_key_id key_id; size_t offset; /* offset of struct flow_dissector_key_* in target the struct */ }; struct flow_dissector { unsigned long long used_keys; /* each bit represents presence of one key id */ unsigned short int offset[FLOW_DISSECTOR_KEY_MAX]; }; struct flow_keys_basic { struct flow_dissector_key_control control; struct flow_dissector_key_basic basic; }; struct flow_keys { struct flow_dissector_key_control control; #define FLOW_KEYS_HASH_START_FIELD basic struct flow_dissector_key_basic basic __aligned(SIPHASH_ALIGNMENT); struct flow_dissector_key_tags tags; struct flow_dissector_key_vlan vlan; struct flow_dissector_key_vlan cvlan; struct flow_dissector_key_keyid keyid; struct flow_dissector_key_ports ports; struct flow_dissector_key_icmp icmp; /* 'addrs' must be the last member */ struct flow_dissector_key_addrs addrs; }; #define FLOW_KEYS_HASH_OFFSET \ offsetof(struct flow_keys, FLOW_KEYS_HASH_START_FIELD) __be32 flow_get_u32_src(const struct flow_keys *flow); __be32 flow_get_u32_dst(const struct flow_keys *flow); extern struct flow_dissector flow_keys_dissector; extern struct flow_dissector flow_keys_basic_dissector; /* struct flow_keys_digest: * * This structure is used to hold a digest of the full flow keys. This is a * larger "hash" of a flow to allow definitively matching specific flows where * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so * that it can be used in CB of skb (see sch_choke for an example). */ #define FLOW_KEYS_DIGEST_LEN 16 struct flow_keys_digest { u8 data[FLOW_KEYS_DIGEST_LEN]; }; void make_flow_keys_digest(struct flow_keys_digest *digest, const struct flow_keys *flow); static inline bool flow_keys_have_l4(const struct flow_keys *keys) { return (keys->ports.ports || keys->tags.flow_label); } u32 flow_hash_from_keys(struct flow_keys *keys); u32 flow_hash_from_keys_seed(struct flow_keys *keys, const siphash_key_t *keyval); void skb_flow_get_icmp_tci(const struct sk_buff *skb, struct flow_dissector_key_icmp *key_icmp, const void *data, int thoff, int hlen); static inline bool dissector_uses_key(const struct flow_dissector *flow_dissector, enum flow_dissector_key_id key_id) { return flow_dissector->used_keys & (1ULL << key_id); } static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissector, enum flow_dissector_key_id key_id, void *target_container) { return ((char *)target_container) + flow_dissector->offset[key_id]; } struct bpf_flow_dissector { struct bpf_flow_keys *flow_keys; const struct sk_buff *skb; const void *data; const void *data_end; }; static inline void flow_dissector_init_keys(struct flow_dissector_key_control *key_control, struct flow_dissector_key_basic *key_basic) { memset(key_control, 0, sizeof(*key_control)); memset(key_basic, 0, sizeof(*key_basic)); } #ifdef CONFIG_BPF_SYSCALL int flow_dissector_bpf_prog_attach_check(struct net *net, struct bpf_prog *prog); #endif /* CONFIG_BPF_SYSCALL */ #endif
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 // SPDX-License-Identifier: GPL-2.0-only /* * OCB mode implementation * * Copyright: (c) 2014 Czech Technical University in Prague * (c) 2014 Volkswagen Group Research * Copyright (C) 2022 - 2024 Intel Corporation * Author: Rostislav Lisovy <rostislav.lisovy@fel.cvut.cz> * Funded by: Volkswagen Group Research */ #include <linux/delay.h> #include <linux/if_ether.h> #include <linux/skbuff.h> #include <linux/if_arp.h> #include <linux/etherdevice.h> #include <linux/rtnetlink.h> #include <net/mac80211.h> #include <linux/unaligned.h> #include "ieee80211_i.h" #include "driver-ops.h" #include "rate.h" #define IEEE80211_OCB_HOUSEKEEPING_INTERVAL (60 * HZ) #define IEEE80211_OCB_PEER_INACTIVITY_LIMIT (240 * HZ) #define IEEE80211_OCB_MAX_STA_ENTRIES 128 /** * enum ocb_deferred_task_flags - mac80211 OCB deferred tasks * @OCB_WORK_HOUSEKEEPING: run the periodic OCB housekeeping tasks * * These flags are used in @wrkq_flags field of &struct ieee80211_if_ocb */ enum ocb_deferred_task_flags { OCB_WORK_HOUSEKEEPING, }; void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid, const u8 *addr, u32 supp_rates) { struct ieee80211_if_ocb *ifocb = &sdata->u.ocb; struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_supported_band *sband; struct sta_info *sta; int band; /* XXX: Consider removing the least recently used entry and * allow new one to be added. */ if (local->num_sta >= IEEE80211_OCB_MAX_STA_ENTRIES) { net_info_ratelimited("%s: No room for a new OCB STA entry %pM\n", sdata->name, addr); return; } ocb_dbg(sdata, "Adding new OCB station %pM\n", addr); rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON_ONCE(!chanctx_conf)) { rcu_read_unlock(); return; } band = chanctx_conf->def.chan->band; rcu_read_unlock(); sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); if (!sta) return; /* Add only mandatory rates for now */ sband = local->hw.wiphy->bands[band]; sta->sta.deflink.supp_rates[band] = ieee80211_mandatory_rates(sband); spin_lock(&ifocb->incomplete_lock); list_add(&sta->list, &ifocb->incomplete_stations); spin_unlock(&ifocb->incomplete_lock); wiphy_work_queue(local->hw.wiphy, &sdata->work); } static struct sta_info *ieee80211_ocb_finish_sta(struct sta_info *sta) __acquires(RCU) { struct ieee80211_sub_if_data *sdata = sta->sdata; u8 addr[ETH_ALEN]; memcpy(addr, sta->sta.addr, ETH_ALEN); ocb_dbg(sdata, "Adding new IBSS station %pM (dev=%s)\n", addr, sdata->name); sta_info_move_state(sta, IEEE80211_STA_AUTH); sta_info_move_state(sta, IEEE80211_STA_ASSOC); sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); rate_control_rate_init(&sta->deflink); /* If it fails, maybe we raced another insertion? */ if (sta_info_insert_rcu(sta)) return sta_info_get(sdata, addr); return sta; } static void ieee80211_ocb_housekeeping(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ocb *ifocb = &sdata->u.ocb; ocb_dbg(sdata, "Running ocb housekeeping\n"); ieee80211_sta_expire(sdata, IEEE80211_OCB_PEER_INACTIVITY_LIMIT); mod_timer(&ifocb->housekeeping_timer, round_jiffies(jiffies + IEEE80211_OCB_HOUSEKEEPING_INTERVAL)); } void ieee80211_ocb_work(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ocb *ifocb = &sdata->u.ocb; struct sta_info *sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (ifocb->joined != true) return; spin_lock_bh(&ifocb->incomplete_lock); while (!list_empty(&ifocb->incomplete_stations)) { sta = list_first_entry(&ifocb->incomplete_stations, struct sta_info, list); list_del(&sta->list); spin_unlock_bh(&ifocb->incomplete_lock); ieee80211_ocb_finish_sta(sta); rcu_read_unlock(); spin_lock_bh(&ifocb->incomplete_lock); } spin_unlock_bh(&ifocb->incomplete_lock); if (test_and_clear_bit(OCB_WORK_HOUSEKEEPING, &ifocb->wrkq_flags)) ieee80211_ocb_housekeeping(sdata); } static void ieee80211_ocb_housekeeping_timer(struct timer_list *t) { struct ieee80211_sub_if_data *sdata = timer_container_of(sdata, t, u.ocb.housekeeping_timer); struct ieee80211_local *local = sdata->local; struct ieee80211_if_ocb *ifocb = &sdata->u.ocb; set_bit(OCB_WORK_HOUSEKEEPING, &ifocb->wrkq_flags); wiphy_work_queue(local->hw.wiphy, &sdata->work); } void ieee80211_ocb_setup_sdata(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ocb *ifocb = &sdata->u.ocb; timer_setup(&ifocb->housekeeping_timer, ieee80211_ocb_housekeeping_timer, 0); INIT_LIST_HEAD(&ifocb->incomplete_stations); spin_lock_init(&ifocb->incomplete_lock); } int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata, struct ocb_setup *setup) { struct ieee80211_chan_req chanreq = { .oper = setup->chandef }; struct ieee80211_local *local = sdata->local; struct ieee80211_if_ocb *ifocb = &sdata->u.ocb; u64 changed = BSS_CHANGED_OCB | BSS_CHANGED_BSSID; int err; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (ifocb->joined == true) return -EINVAL; sdata->deflink.operating_11g_mode = true; sdata->deflink.smps_mode = IEEE80211_SMPS_OFF; sdata->deflink.needed_rx_chains = sdata->local->rx_chains; err = ieee80211_link_use_channel(&sdata->deflink, &chanreq, IEEE80211_CHANCTX_SHARED); if (err) return err; ieee80211_bss_info_change_notify(sdata, changed); ifocb->joined = true; set_bit(OCB_WORK_HOUSEKEEPING, &ifocb->wrkq_flags); wiphy_work_queue(local->hw.wiphy, &sdata->work); netif_carrier_on(sdata->dev); return 0; } int ieee80211_ocb_leave(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ocb *ifocb = &sdata->u.ocb; struct ieee80211_local *local = sdata->local; struct sta_info *sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); ifocb->joined = false; sta_info_flush(sdata, -1); spin_lock_bh(&ifocb->incomplete_lock); while (!list_empty(&ifocb->incomplete_stations)) { sta = list_first_entry(&ifocb->incomplete_stations, struct sta_info, list); list_del(&sta->list); spin_unlock_bh(&ifocb->incomplete_lock); sta_info_free(local, sta); spin_lock_bh(&ifocb->incomplete_lock); } spin_unlock_bh(&ifocb->incomplete_lock); netif_carrier_off(sdata->dev); clear_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_OCB); ieee80211_link_release_channel(&sdata->deflink); skb_queue_purge(&sdata->skb_queue); timer_delete_sync(&sdata->u.ocb.housekeeping_timer); /* If the timer fired while we waited for it, it will have * requeued the work. Now the work will be running again * but will not rearm the timer again because it checks * whether we are connected to the network or not -- at this * point we shouldn't be anymore. */ return 0; }
9 6 9 21 8 11 2 3 6 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 // SPDX-License-Identifier: GPL-2.0-or-later /* * IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173 * * Copyright (C)2003 USAGI/WIDE Project * * Author Mitsuru KANDA <mk@linux-ipv6.org> */ /* * [Memo] * * Outbound: * The compression of IP datagram MUST be done before AH/ESP processing, * fragmentation, and the addition of Hop-by-Hop/Routing header. * * Inbound: * The decompression of IP datagram MUST be done after the reassembly, * AH/ESP processing. */ #define pr_fmt(fmt) "IPv6: " fmt #include <linux/module.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/ipcomp.h> #include <linux/crypto.h> #include <linux/err.h> #include <linux/pfkeyv2.h> #include <linux/random.h> #include <linux/percpu.h> #include <linux/smp.h> #include <linux/list.h> #include <linux/vmalloc.h> #include <linux/rtnetlink.h> #include <net/ip6_route.h> #include <net/icmp.h> #include <net/ipv6.h> #include <net/protocol.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/mutex.h> static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); __be32 spi; const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data; struct ip_comp_hdr *ipcomph = (struct ip_comp_hdr *)(skb->data + offset); struct xfrm_state *x; if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) return 0; spi = htonl(ntohs(ipcomph->cpi)); x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); if (!x) return 0; if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); else ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; } static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) { struct net *net = xs_net(x); struct xfrm_state *t = NULL; t = xfrm_state_alloc(net); if (!t) goto out; t->id.proto = IPPROTO_IPV6; t->id.spi = xfrm6_tunnel_alloc_spi(net, (xfrm_address_t *)&x->props.saddr); if (!t->id.spi) goto error; memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET6; t->props.mode = x->props.mode; memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); memcpy(&t->mark, &x->mark, sizeof(t->mark)); t->if_id = x->if_id; if (xfrm_init_state(t)) goto error; atomic_set(&t->tunnel_users, 1); out: return t; error: t->km.state = XFRM_STATE_DEAD; xfrm_state_put(t); t = NULL; goto out; } static int ipcomp6_tunnel_attach(struct xfrm_state *x) { struct net *net = xs_net(x); int err = 0; struct xfrm_state *t = NULL; __be32 spi; u32 mark = x->mark.m & x->mark.v; spi = xfrm6_tunnel_spi_lookup(net, (xfrm_address_t *)&x->props.saddr); if (spi) t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr, spi, IPPROTO_IPV6, AF_INET6); if (!t) { t = ipcomp6_tunnel_create(x); if (!t) { err = -EINVAL; goto out; } xfrm_state_insert(t); xfrm_state_hold(t); } x->tunnel = t; atomic_inc(&t->tunnel_users); out: return err; } static int ipcomp6_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { int err = -EINVAL; x->props.header_len = 0; switch (x->props.mode) { case XFRM_MODE_TRANSPORT: break; case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); break; default: NL_SET_ERR_MSG(extack, "Unsupported XFRM mode for IPcomp"); goto out; } err = ipcomp_init_state(x, extack); if (err) goto out; if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp6_tunnel_attach(x); if (err) { NL_SET_ERR_MSG(extack, "Kernel error: failed to initialize the associated state"); goto out; } } err = 0; out: return err; } static int ipcomp6_rcv_cb(struct sk_buff *skb, int err) { return 0; } static const struct xfrm_type ipcomp6_type = { .owner = THIS_MODULE, .proto = IPPROTO_COMP, .init_state = ipcomp6_init_state, .destructor = ipcomp_destroy, .input = ipcomp_input, .output = ipcomp_output, }; static struct xfrm6_protocol ipcomp6_protocol = { .handler = xfrm6_rcv, .input_handler = xfrm_input, .cb_handler = ipcomp6_rcv_cb, .err_handler = ipcomp6_err, .priority = 0, }; static int __init ipcomp6_init(void) { if (xfrm_register_type(&ipcomp6_type, AF_INET6) < 0) { pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } if (xfrm6_protocol_register(&ipcomp6_protocol, IPPROTO_COMP) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&ipcomp6_type, AF_INET6); return -EAGAIN; } return 0; } static void __exit ipcomp6_fini(void) { if (xfrm6_protocol_deregister(&ipcomp6_protocol, IPPROTO_COMP) < 0) pr_info("%s: can't remove protocol\n", __func__); xfrm_unregister_type(&ipcomp6_type, AF_INET6); } module_init(ipcomp6_init); module_exit(ipcomp6_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173"); MODULE_AUTHOR("Mitsuru KANDA <mk@linux-ipv6.org>"); MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_COMP);
4 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_REJECT_H #define _NF_REJECT_H #include <linux/types.h> #include <uapi/linux/in.h> static inline bool nf_reject_verify_csum(struct sk_buff *skb, int dataoff, __u8 proto) { /* Skip protocols that don't use 16-bit one's complement checksum * of the entire payload. */ switch (proto) { /* Protocols with optional checksums. */ case IPPROTO_UDP: { const struct udphdr *udp_hdr; struct udphdr _udp_hdr; udp_hdr = skb_header_pointer(skb, dataoff, sizeof(_udp_hdr), &_udp_hdr); if (!udp_hdr || udp_hdr->check) return true; return false; } case IPPROTO_GRE: /* Protocols with other integrity checks. */ case IPPROTO_AH: case IPPROTO_ESP: case IPPROTO_SCTP: /* Protocols with partial checksums. */ case IPPROTO_UDPLITE: case IPPROTO_DCCP: return false; } return true; } #endif /* _NF_REJECT_H */
55 54 349 350 11 545 289 289 286 290 289 290 2 290 288 290 585 584 349 348 350 344 4 7 343 565 586 4 13 17 2 2 2 2 1 15 14 2 2 14 14 13 13 12 11 13 13 14 4 9 3 3 2 12 3 6 12 9 9 9 11 1 6 2 9 586 142 586 502 502 576 47 44 44 546 22 96 577 585 566 562 552 503 30 1 153 170 169 10 8 7 13 13 13 2 583 85 135 13 135 121 135 3 97 55 55 95 5 97 585 512 178 178 178 177 587 587 583 585 89 11 89 89 89 88 11 13 89 16 15 89 88 11 6 11 11 89 89 87 87 89 89 89 30 56 64 88 89 5 6 87 9 35 84 11 87 9 87 11 87 11 40 40 89 11 87 87 40 65 25 4 2 18 68 14 40 29 87 88 11 11 11 11 11 9 11 9 11 87 40 11 3 90 2 19 87 88 6 1 1 3 2 3 85 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001-2003 Intel Corp. * * This file is part of the SCTP kernel implementation * * These functions implement the sctp_outq class. The outqueue handles * bundling and queueing of outgoing SCTP chunks. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * La Monte H.P. Yarroll <piggy@acm.org> * Karl Knutson <karl@athena.chicago.il.us> * Perry Melange <pmelange@null.cc.uic.edu> * Xingang Guo <xingang.guo@intel.com> * Hui Huang <hui.huang@nokia.com> * Sridhar Samudrala <sri@us.ibm.com> * Jon Grimm <jgrimm@us.ibm.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> #include <linux/list.h> /* For struct list_head */ #include <linux/socket.h> #include <linux/ip.h> #include <linux/slab.h> #include <net/sock.h> /* For skb_set_owner_w */ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> #include <net/sctp/stream_sched.h> #include <trace/events/sctp.h> /* Declare internal functions here. */ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn); static void sctp_check_transmitted(struct sctp_outq *q, struct list_head *transmitted_queue, struct sctp_transport *transport, union sctp_addr *saddr, struct sctp_sackhdr *sack, __u32 *highest_new_tsn); static void sctp_mark_missing(struct sctp_outq *q, struct list_head *transmitted_queue, struct sctp_transport *transport, __u32 highest_new_tsn, int count_of_newacks); static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp); /* Add data to the front of the queue. */ static inline void sctp_outq_head_data(struct sctp_outq *q, struct sctp_chunk *ch) { struct sctp_stream_out_ext *oute; __u16 stream; list_add(&ch->list, &q->out_chunk_list); q->out_qlen += ch->skb->len; stream = sctp_chunk_stream_no(ch); oute = SCTP_SO(&q->asoc->stream, stream)->ext; list_add(&ch->stream_list, &oute->outq); } /* Take data from the front of the queue. */ static inline struct sctp_chunk *sctp_outq_dequeue_data(struct sctp_outq *q) { return q->sched->dequeue(q); } /* Add data chunk to the end of the queue. */ static inline void sctp_outq_tail_data(struct sctp_outq *q, struct sctp_chunk *ch) { struct sctp_stream_out_ext *oute; __u16 stream; list_add_tail(&ch->list, &q->out_chunk_list); q->out_qlen += ch->skb->len; stream = sctp_chunk_stream_no(ch); oute = SCTP_SO(&q->asoc->stream, stream)->ext; list_add_tail(&ch->stream_list, &oute->outq); } /* * SFR-CACC algorithm: * D) If count_of_newacks is greater than or equal to 2 * and t was not sent to the current primary then the * sender MUST NOT increment missing report count for t. */ static inline int sctp_cacc_skip_3_1_d(struct sctp_transport *primary, struct sctp_transport *transport, int count_of_newacks) { if (count_of_newacks >= 2 && transport != primary) return 1; return 0; } /* * SFR-CACC algorithm: * F) If count_of_newacks is less than 2, let d be the * destination to which t was sent. If cacc_saw_newack * is 0 for destination d, then the sender MUST NOT * increment missing report count for t. */ static inline int sctp_cacc_skip_3_1_f(struct sctp_transport *transport, int count_of_newacks) { if (count_of_newacks < 2 && (transport && !transport->cacc.cacc_saw_newack)) return 1; return 0; } /* * SFR-CACC algorithm: * 3.1) If CYCLING_CHANGEOVER is 0, the sender SHOULD * execute steps C, D, F. * * C has been implemented in sctp_outq_sack */ static inline int sctp_cacc_skip_3_1(struct sctp_transport *primary, struct sctp_transport *transport, int count_of_newacks) { if (!primary->cacc.cycling_changeover) { if (sctp_cacc_skip_3_1_d(primary, transport, count_of_newacks)) return 1; if (sctp_cacc_skip_3_1_f(transport, count_of_newacks)) return 1; return 0; } return 0; } /* * SFR-CACC algorithm: * 3.2) Else if CYCLING_CHANGEOVER is 1, and t is less * than next_tsn_at_change of the current primary, then * the sender MUST NOT increment missing report count * for t. */ static inline int sctp_cacc_skip_3_2(struct sctp_transport *primary, __u32 tsn) { if (primary->cacc.cycling_changeover && TSN_lt(tsn, primary->cacc.next_tsn_at_change)) return 1; return 0; } /* * SFR-CACC algorithm: * 3) If the missing report count for TSN t is to be * incremented according to [RFC2960] and * [SCTP_STEWART-2002], and CHANGEOVER_ACTIVE is set, * then the sender MUST further execute steps 3.1 and * 3.2 to determine if the missing report count for * TSN t SHOULD NOT be incremented. * * 3.3) If 3.1 and 3.2 do not dictate that the missing * report count for t should not be incremented, then * the sender SHOULD increment missing report count for * t (according to [RFC2960] and [SCTP_STEWART_2002]). */ static inline int sctp_cacc_skip(struct sctp_transport *primary, struct sctp_transport *transport, int count_of_newacks, __u32 tsn) { if (primary->cacc.changeover_active && (sctp_cacc_skip_3_1(primary, transport, count_of_newacks) || sctp_cacc_skip_3_2(primary, tsn))) return 1; return 0; } /* Initialize an existing sctp_outq. This does the boring stuff. * You still need to define handlers if you really want to DO * something with this structure... */ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) { memset(q, 0, sizeof(struct sctp_outq)); q->asoc = asoc; INIT_LIST_HEAD(&q->out_chunk_list); INIT_LIST_HEAD(&q->control_chunk_list); INIT_LIST_HEAD(&q->retransmit); INIT_LIST_HEAD(&q->sacked); INIT_LIST_HEAD(&q->abandoned); sctp_sched_set_sched(asoc, sctp_sk(asoc->base.sk)->default_ss); } /* Free the outqueue structure and any related pending chunks. */ static void __sctp_outq_teardown(struct sctp_outq *q) { struct sctp_transport *transport; struct list_head *lchunk, *temp; struct sctp_chunk *chunk, *tmp; /* Throw away unacknowledged chunks. */ list_for_each_entry(transport, &q->asoc->peer.transport_addr_list, transports) { while ((lchunk = sctp_list_dequeue(&transport->transmitted)) != NULL) { chunk = list_entry(lchunk, struct sctp_chunk, transmitted_list); /* Mark as part of a failed message. */ sctp_chunk_fail(chunk, q->error); sctp_chunk_free(chunk); } } /* Throw away chunks that have been gap ACKed. */ list_for_each_safe(lchunk, temp, &q->sacked) { list_del_init(lchunk); chunk = list_entry(lchunk, struct sctp_chunk, transmitted_list); sctp_chunk_fail(chunk, q->error); sctp_chunk_free(chunk); } /* Throw away any chunks in the retransmit queue. */ list_for_each_safe(lchunk, temp, &q->retransmit) { list_del_init(lchunk); chunk = list_entry(lchunk, struct sctp_chunk, transmitted_list); sctp_chunk_fail(chunk, q->error); sctp_chunk_free(chunk); } /* Throw away any chunks that are in the abandoned queue. */ list_for_each_safe(lchunk, temp, &q->abandoned) { list_del_init(lchunk); chunk = list_entry(lchunk, struct sctp_chunk, transmitted_list); sctp_chunk_fail(chunk, q->error); sctp_chunk_free(chunk); } /* Throw away any leftover data chunks. */ while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { sctp_sched_dequeue_done(q, chunk); /* Mark as send failure. */ sctp_chunk_fail(chunk, q->error); sctp_chunk_free(chunk); } /* Throw away any leftover control chunks. */ list_for_each_entry_safe(chunk, tmp, &q->control_chunk_list, list) { list_del_init(&chunk->list); sctp_chunk_free(chunk); } } void sctp_outq_teardown(struct sctp_outq *q) { __sctp_outq_teardown(q); sctp_outq_init(q->asoc, q); } /* Free the outqueue structure and any related pending chunks. */ void sctp_outq_free(struct sctp_outq *q) { /* Throw away leftover chunks. */ __sctp_outq_teardown(q); } /* Put a new chunk in an sctp_outq. */ void sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) { struct net *net = q->asoc->base.net; pr_debug("%s: outq:%p, chunk:%p[%s]\n", __func__, q, chunk, chunk && chunk->chunk_hdr ? sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : "illegal chunk"); /* If it is data, queue it up, otherwise, send it * immediately. */ if (sctp_chunk_is_data(chunk)) { pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n", __func__, q, chunk, chunk && chunk->chunk_hdr ? sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : "illegal chunk"); sctp_outq_tail_data(q, chunk); if (chunk->asoc->peer.prsctp_capable && SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) chunk->asoc->sent_cnt_removable++; if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS); else SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS); } else { list_add_tail(&chunk->list, &q->control_chunk_list); SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); } if (!q->cork) sctp_outq_flush(q, 0, gfp); } /* Insert a chunk into the sorted list based on the TSNs. The retransmit list * and the abandoned list are in ascending order. */ static void sctp_insert_list(struct list_head *head, struct list_head *new) { struct list_head *pos; struct sctp_chunk *nchunk, *lchunk; __u32 ntsn, ltsn; int done = 0; nchunk = list_entry(new, struct sctp_chunk, transmitted_list); ntsn = ntohl(nchunk->subh.data_hdr->tsn); list_for_each(pos, head) { lchunk = list_entry(pos, struct sctp_chunk, transmitted_list); ltsn = ntohl(lchunk->subh.data_hdr->tsn); if (TSN_lt(ntsn, ltsn)) { list_add(new, pos->prev); done = 1; break; } } if (!done) list_add_tail(new, head); } static int sctp_prsctp_prune_sent(struct sctp_association *asoc, struct sctp_sndrcvinfo *sinfo, struct list_head *queue, int msg_len) { struct sctp_chunk *chk, *temp; list_for_each_entry_safe(chk, temp, queue, transmitted_list) { struct sctp_stream_out *streamout; if (!chk->msg->abandoned && (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)) continue; chk->msg->abandoned = 1; list_del_init(&chk->transmitted_list); sctp_insert_list(&asoc->outqueue.abandoned, &chk->transmitted_list); streamout = SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream); asoc->sent_cnt_removable--; asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; if (queue != &asoc->outqueue.retransmit && !chk->tsn_gap_acked) { if (chk->transport) chk->transport->flight_size -= sctp_data_size(chk); asoc->outqueue.outstanding_bytes -= sctp_data_size(chk); } msg_len -= chk->skb->truesize + sizeof(struct sctp_chunk); if (msg_len <= 0) break; } return msg_len; } static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, struct sctp_sndrcvinfo *sinfo, int msg_len) { struct sctp_outq *q = &asoc->outqueue; struct sctp_chunk *chk, *temp; struct sctp_stream_out *sout; q->sched->unsched_all(&asoc->stream); list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) { if (!chk->msg->abandoned && (!(chk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG) || !SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)) continue; chk->msg->abandoned = 1; sctp_sched_dequeue_common(q, chk); asoc->sent_cnt_removable--; asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; sout = SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream); sout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; /* clear out_curr if all frag chunks are pruned */ if (asoc->stream.out_curr == sout && list_is_last(&chk->frag_list, &chk->msg->chunks)) asoc->stream.out_curr = NULL; msg_len -= chk->skb->truesize + sizeof(struct sctp_chunk); sctp_chunk_free(chk); if (msg_len <= 0) break; } q->sched->sched_all(&asoc->stream); return msg_len; } /* Abandon the chunks according their priorities */ void sctp_prsctp_prune(struct sctp_association *asoc, struct sctp_sndrcvinfo *sinfo, int msg_len) { struct sctp_transport *transport; if (!asoc->peer.prsctp_capable || !asoc->sent_cnt_removable) return; msg_len = sctp_prsctp_prune_sent(asoc, sinfo, &asoc->outqueue.retransmit, msg_len); if (msg_len <= 0) return; list_for_each_entry(transport, &asoc->peer.transport_addr_list, transports) { msg_len = sctp_prsctp_prune_sent(asoc, sinfo, &transport->transmitted, msg_len); if (msg_len <= 0) return; } sctp_prsctp_prune_unsent(asoc, sinfo, msg_len); } /* Mark all the eligible packets on a transport for retransmission. */ void sctp_retransmit_mark(struct sctp_outq *q, struct sctp_transport *transport, __u8 reason) { struct list_head *lchunk, *ltemp; struct sctp_chunk *chunk; /* Walk through the specified transmitted queue. */ list_for_each_safe(lchunk, ltemp, &transport->transmitted) { chunk = list_entry(lchunk, struct sctp_chunk, transmitted_list); /* If the chunk is abandoned, move it to abandoned list. */ if (sctp_chunk_abandoned(chunk)) { list_del_init(lchunk); sctp_insert_list(&q->abandoned, lchunk); /* If this chunk has not been previousely acked, * stop considering it 'outstanding'. Our peer * will most likely never see it since it will * not be retransmitted */ if (!chunk->tsn_gap_acked) { if (chunk->transport) chunk->transport->flight_size -= sctp_data_size(chunk); q->outstanding_bytes -= sctp_data_size(chunk); q->asoc->peer.rwnd += sctp_data_size(chunk); } continue; } /* If we are doing retransmission due to a timeout or pmtu * discovery, only the chunks that are not yet acked should * be added to the retransmit queue. */ if ((reason == SCTP_RTXR_FAST_RTX && (chunk->fast_retransmit == SCTP_NEED_FRTX)) || (reason != SCTP_RTXR_FAST_RTX && !chunk->tsn_gap_acked)) { /* RFC 2960 6.2.1 Processing a Received SACK * * C) Any time a DATA chunk is marked for * retransmission (via either T3-rtx timer expiration * (Section 6.3.3) or via fast retransmit * (Section 7.2.4)), add the data size of those * chunks to the rwnd. */ q->asoc->peer.rwnd += sctp_data_size(chunk); q->outstanding_bytes -= sctp_data_size(chunk); if (chunk->transport) transport->flight_size -= sctp_data_size(chunk); /* sctpimpguide-05 Section 2.8.2 * M5) If a T3-rtx timer expires, the * 'TSN.Missing.Report' of all affected TSNs is set * to 0. */ chunk->tsn_missing_report = 0; /* If a chunk that is being used for RTT measurement * has to be retransmitted, we cannot use this chunk * anymore for RTT measurements. Reset rto_pending so * that a new RTT measurement is started when a new * data chunk is sent. */ if (chunk->rtt_in_progress) { chunk->rtt_in_progress = 0; transport->rto_pending = 0; } /* Move the chunk to the retransmit queue. The chunks * on the retransmit queue are always kept in order. */ list_del_init(lchunk); sctp_insert_list(&q->retransmit, lchunk); } } pr_debug("%s: transport:%p, reason:%d, cwnd:%d, ssthresh:%d, " "flight_size:%d, pba:%d\n", __func__, transport, reason, transport->cwnd, transport->ssthresh, transport->flight_size, transport->partial_bytes_acked); } /* Mark all the eligible packets on a transport for retransmission and force * one packet out. */ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, enum sctp_retransmit_reason reason) { struct net *net = q->asoc->base.net; switch (reason) { case SCTP_RTXR_T3_RTX: SCTP_INC_STATS(net, SCTP_MIB_T3_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX); /* Update the retran path if the T3-rtx timer has expired for * the current retran path. */ if (transport == transport->asoc->peer.retran_path) sctp_assoc_update_retran_path(transport->asoc); transport->asoc->rtx_data_chunks += transport->asoc->unack_data; if (transport->pl.state == SCTP_PL_COMPLETE && transport->asoc->unack_data) sctp_transport_reset_probe_timer(transport); break; case SCTP_RTXR_FAST_RTX: SCTP_INC_STATS(net, SCTP_MIB_FAST_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); q->fast_rtx = 1; break; case SCTP_RTXR_PMTUD: SCTP_INC_STATS(net, SCTP_MIB_PMTUD_RETRANSMITS); break; case SCTP_RTXR_T1_RTX: SCTP_INC_STATS(net, SCTP_MIB_T1_RETRANSMITS); transport->asoc->init_retries++; break; default: BUG(); } sctp_retransmit_mark(q, transport, reason); /* PR-SCTP A5) Any time the T3-rtx timer expires, on any destination, * the sender SHOULD try to advance the "Advanced.Peer.Ack.Point" by * following the procedures outlined in C1 - C5. */ if (reason == SCTP_RTXR_T3_RTX) q->asoc->stream.si->generate_ftsn(q, q->asoc->ctsn_ack_point); /* Flush the queues only on timeout, since fast_rtx is only * triggered during sack processing and the queue * will be flushed at the end. */ if (reason != SCTP_RTXR_FAST_RTX) sctp_outq_flush(q, /* rtx_timeout */ 1, GFP_ATOMIC); } /* * Transmit DATA chunks on the retransmit queue. Upon return from * __sctp_outq_flush_rtx() the packet 'pkt' may contain chunks which * need to be transmitted by the caller. * We assume that pkt->transport has already been set. * * The return value is a normal kernel error return value. */ static int __sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, int rtx_timeout, int *start_timer, gfp_t gfp) { struct sctp_transport *transport = pkt->transport; struct sctp_chunk *chunk, *chunk1; struct list_head *lqueue; enum sctp_xmit status; int error = 0; int timer = 0; int done = 0; int fast_rtx; lqueue = &q->retransmit; fast_rtx = q->fast_rtx; /* This loop handles time-out retransmissions, fast retransmissions, * and retransmissions due to opening of whindow. * * RFC 2960 6.3.3 Handle T3-rtx Expiration * * E3) Determine how many of the earliest (i.e., lowest TSN) * outstanding DATA chunks for the address for which the * T3-rtx has expired will fit into a single packet, subject * to the MTU constraint for the path corresponding to the * destination transport address to which the retransmission * is being sent (this may be different from the address for * which the timer expires [see Section 6.4]). Call this value * K. Bundle and retransmit those K DATA chunks in a single * packet to the destination endpoint. * * [Just to be painfully clear, if we are retransmitting * because a timeout just happened, we should send only ONE * packet of retransmitted data.] * * For fast retransmissions we also send only ONE packet. However, * if we are just flushing the queue due to open window, we'll * try to send as much as possible. */ list_for_each_entry_safe(chunk, chunk1, lqueue, transmitted_list) { /* If the chunk is abandoned, move it to abandoned list. */ if (sctp_chunk_abandoned(chunk)) { list_del_init(&chunk->transmitted_list); sctp_insert_list(&q->abandoned, &chunk->transmitted_list); continue; } /* Make sure that Gap Acked TSNs are not retransmitted. A * simple approach is just to move such TSNs out of the * way and into a 'transmitted' queue and skip to the * next chunk. */ if (chunk->tsn_gap_acked) { list_move_tail(&chunk->transmitted_list, &transport->transmitted); continue; } /* If we are doing fast retransmit, ignore non-fast_rtransmit * chunks */ if (fast_rtx && !chunk->fast_retransmit) continue; redo: /* Attempt to append this chunk to the packet. */ status = sctp_packet_append_chunk(pkt, chunk); switch (status) { case SCTP_XMIT_PMTU_FULL: if (!pkt->has_data && !pkt->has_cookie_echo) { /* If this packet did not contain DATA then * retransmission did not happen, so do it * again. We'll ignore the error here since * control chunks are already freed so there * is nothing we can do. */ sctp_packet_transmit(pkt, gfp); goto redo; } /* Send this packet. */ error = sctp_packet_transmit(pkt, gfp); /* If we are retransmitting, we should only * send a single packet. * Otherwise, try appending this chunk again. */ if (rtx_timeout || fast_rtx) done = 1; else goto redo; /* Bundle next chunk in the next round. */ break; case SCTP_XMIT_RWND_FULL: /* Send this packet. */ error = sctp_packet_transmit(pkt, gfp); /* Stop sending DATA as there is no more room * at the receiver. */ done = 1; break; case SCTP_XMIT_DELAY: /* Send this packet. */ error = sctp_packet_transmit(pkt, gfp); /* Stop sending DATA because of nagle delay. */ done = 1; break; default: /* The append was successful, so add this chunk to * the transmitted list. */ list_move_tail(&chunk->transmitted_list, &transport->transmitted); /* Mark the chunk as ineligible for fast retransmit * after it is retransmitted. */ if (chunk->fast_retransmit == SCTP_NEED_FRTX) chunk->fast_retransmit = SCTP_DONT_FRTX; q->asoc->stats.rtxchunks++; break; } /* Set the timer if there were no errors */ if (!error && !timer) timer = 1; if (done) break; } /* If we are here due to a retransmit timeout or a fast * retransmit and if there are any chunks left in the retransmit * queue that could not fit in the PMTU sized packet, they need * to be marked as ineligible for a subsequent fast retransmit. */ if (rtx_timeout || fast_rtx) { list_for_each_entry(chunk1, lqueue, transmitted_list) { if (chunk1->fast_retransmit == SCTP_NEED_FRTX) chunk1->fast_retransmit = SCTP_DONT_FRTX; } } *start_timer = timer; /* Clear fast retransmit hint */ if (fast_rtx) q->fast_rtx = 0; return error; } /* Cork the outqueue so queued chunks are really queued. */ void sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp) { if (q->cork) q->cork = 0; sctp_outq_flush(q, 0, gfp); } static int sctp_packet_singleton(struct sctp_transport *transport, struct sctp_chunk *chunk, gfp_t gfp) { const struct sctp_association *asoc = transport->asoc; const __u16 sport = asoc->base.bind_addr.port; const __u16 dport = asoc->peer.port; const __u32 vtag = asoc->peer.i.init_tag; struct sctp_packet singleton; sctp_packet_init(&singleton, transport, sport, dport); sctp_packet_config(&singleton, vtag, 0); if (sctp_packet_append_chunk(&singleton, chunk) != SCTP_XMIT_OK) { list_del_init(&chunk->list); sctp_chunk_free(chunk); return -ENOMEM; } return sctp_packet_transmit(&singleton, gfp); } /* Struct to hold the context during sctp outq flush */ struct sctp_flush_ctx { struct sctp_outq *q; /* Current transport being used. It's NOT the same as curr active one */ struct sctp_transport *transport; /* These transports have chunks to send. */ struct list_head transport_list; struct sctp_association *asoc; /* Packet on the current transport above */ struct sctp_packet *packet; gfp_t gfp; }; /* transport: current transport */ static void sctp_outq_select_transport(struct sctp_flush_ctx *ctx, struct sctp_chunk *chunk) { struct sctp_transport *new_transport = chunk->transport; if (!new_transport) { if (!sctp_chunk_is_data(chunk)) { /* If we have a prior transport pointer, see if * the destination address of the chunk * matches the destination address of the * current transport. If not a match, then * try to look up the transport with a given * destination address. We do this because * after processing ASCONFs, we may have new * transports created. */ if (ctx->transport && sctp_cmp_addr_exact(&chunk->dest, &ctx->transport->ipaddr)) new_transport = ctx->transport; else new_transport = sctp_assoc_lookup_paddr(ctx->asoc, &chunk->dest); } /* if we still don't have a new transport, then * use the current active path. */ if (!new_transport) new_transport = ctx->asoc->peer.active_path; } else { __u8 type; switch (new_transport->state) { case SCTP_INACTIVE: case SCTP_UNCONFIRMED: case SCTP_PF: /* If the chunk is Heartbeat or Heartbeat Ack, * send it to chunk->transport, even if it's * inactive. * * 3.3.6 Heartbeat Acknowledgement: * ... * A HEARTBEAT ACK is always sent to the source IP * address of the IP datagram containing the * HEARTBEAT chunk to which this ack is responding. * ... * * ASCONF_ACKs also must be sent to the source. */ type = chunk->chunk_hdr->type; if (type != SCTP_CID_HEARTBEAT && type != SCTP_CID_HEARTBEAT_ACK && type != SCTP_CID_ASCONF_ACK) new_transport = ctx->asoc->peer.active_path; break; default: break; } } /* Are we switching transports? Take care of transport locks. */ if (new_transport != ctx->transport) { ctx->transport = new_transport; ctx->packet = &ctx->transport->packet; if (list_empty(&ctx->transport->send_ready)) list_add_tail(&ctx->transport->send_ready, &ctx->transport_list); sctp_packet_config(ctx->packet, ctx->asoc->peer.i.init_tag, ctx->asoc->peer.ecn_capable); /* We've switched transports, so apply the * Burst limit to the new transport. */ sctp_transport_burst_limited(ctx->transport); } } static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx) { struct sctp_chunk *chunk, *tmp; enum sctp_xmit status; int one_packet, error; list_for_each_entry_safe(chunk, tmp, &ctx->q->control_chunk_list, list) { one_packet = 0; /* RFC 5061, 5.3 * F1) This means that until such time as the ASCONF * containing the add is acknowledged, the sender MUST * NOT use the new IP address as a source for ANY SCTP * packet except on carrying an ASCONF Chunk. */ if (ctx->asoc->src_out_of_asoc_ok && chunk->chunk_hdr->type != SCTP_CID_ASCONF) continue; list_del_init(&chunk->list); /* Pick the right transport to use. Should always be true for * the first chunk as we don't have a transport by then. */ sctp_outq_select_transport(ctx, chunk); switch (chunk->chunk_hdr->type) { /* 6.10 Bundling * ... * An endpoint MUST NOT bundle INIT, INIT ACK or SHUTDOWN * COMPLETE with any other chunks. [Send them immediately.] */ case SCTP_CID_INIT: case SCTP_CID_INIT_ACK: case SCTP_CID_SHUTDOWN_COMPLETE: error = sctp_packet_singleton(ctx->transport, chunk, ctx->gfp); if (error < 0) { ctx->asoc->base.sk->sk_err = -error; return; } ctx->asoc->stats.octrlchunks++; break; case SCTP_CID_ABORT: if (sctp_test_T_bit(chunk)) ctx->packet->vtag = ctx->asoc->c.my_vtag; fallthrough; /* The following chunks are "response" chunks, i.e. * they are generated in response to something we * received. If we are sending these, then we can * send only 1 packet containing these chunks. */ case SCTP_CID_HEARTBEAT_ACK: case SCTP_CID_SHUTDOWN_ACK: case SCTP_CID_COOKIE_ACK: case SCTP_CID_COOKIE_ECHO: case SCTP_CID_ERROR: case SCTP_CID_ECN_CWR: case SCTP_CID_ASCONF_ACK: one_packet = 1; fallthrough; case SCTP_CID_HEARTBEAT: if (chunk->pmtu_probe) { error = sctp_packet_singleton(ctx->transport, chunk, ctx->gfp); if (!error) ctx->asoc->stats.octrlchunks++; break; } fallthrough; case SCTP_CID_SACK: case SCTP_CID_SHUTDOWN: case SCTP_CID_ECN_ECNE: case SCTP_CID_ASCONF: case SCTP_CID_FWD_TSN: case SCTP_CID_I_FWD_TSN: case SCTP_CID_RECONF: status = sctp_packet_transmit_chunk(ctx->packet, chunk, one_packet, ctx->gfp); if (status != SCTP_XMIT_OK) { /* put the chunk back */ list_add(&chunk->list, &ctx->q->control_chunk_list); break; } ctx->asoc->stats.octrlchunks++; /* PR-SCTP C5) If a FORWARD TSN is sent, the * sender MUST assure that at least one T3-rtx * timer is running. */ if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN || chunk->chunk_hdr->type == SCTP_CID_I_FWD_TSN) { sctp_transport_reset_t3_rtx(ctx->transport); ctx->transport->last_time_sent = jiffies; } if (chunk == ctx->asoc->strreset_chunk) sctp_transport_reset_reconf_timer(ctx->transport); break; default: /* We built a chunk with an illegal type! */ BUG(); } } } /* Returns false if new data shouldn't be sent */ static bool sctp_outq_flush_rtx(struct sctp_flush_ctx *ctx, int rtx_timeout) { int error, start_timer = 0; if (ctx->asoc->peer.retran_path->state == SCTP_UNCONFIRMED) return false; if (ctx->transport != ctx->asoc->peer.retran_path) { /* Switch transports & prepare the packet. */ ctx->transport = ctx->asoc->peer.retran_path; ctx->packet = &ctx->transport->packet; if (list_empty(&ctx->transport->send_ready)) list_add_tail(&ctx->transport->send_ready, &ctx->transport_list); sctp_packet_config(ctx->packet, ctx->asoc->peer.i.init_tag, ctx->asoc->peer.ecn_capable); } error = __sctp_outq_flush_rtx(ctx->q, ctx->packet, rtx_timeout, &start_timer, ctx->gfp); if (error < 0) ctx->asoc->base.sk->sk_err = -error; if (start_timer) { sctp_transport_reset_t3_rtx(ctx->transport); ctx->transport->last_time_sent = jiffies; } /* This can happen on COOKIE-ECHO resend. Only * one chunk can get bundled with a COOKIE-ECHO. */ if (ctx->packet->has_cookie_echo) return false; /* Don't send new data if there is still data * waiting to retransmit. */ if (!list_empty(&ctx->q->retransmit)) return false; return true; } static void sctp_outq_flush_data(struct sctp_flush_ctx *ctx, int rtx_timeout) { struct sctp_chunk *chunk; enum sctp_xmit status; /* Is it OK to send data chunks? */ switch (ctx->asoc->state) { case SCTP_STATE_COOKIE_ECHOED: /* Only allow bundling when this packet has a COOKIE-ECHO * chunk. */ if (!ctx->packet || !ctx->packet->has_cookie_echo) return; fallthrough; case SCTP_STATE_ESTABLISHED: case SCTP_STATE_SHUTDOWN_PENDING: case SCTP_STATE_SHUTDOWN_RECEIVED: break; default: /* Do nothing. */ return; } /* RFC 2960 6.1 Transmission of DATA Chunks * * C) When the time comes for the sender to transmit, * before sending new DATA chunks, the sender MUST * first transmit any outstanding DATA chunks which * are marked for retransmission (limited by the * current cwnd). */ if (!list_empty(&ctx->q->retransmit) && !sctp_outq_flush_rtx(ctx, rtx_timeout)) return; /* Apply Max.Burst limitation to the current transport in * case it will be used for new data. We are going to * rest it before we return, but we want to apply the limit * to the currently queued data. */ if (ctx->transport) sctp_transport_burst_limited(ctx->transport); /* Finally, transmit new packets. */ while ((chunk = sctp_outq_dequeue_data(ctx->q)) != NULL) { __u32 sid = ntohs(chunk->subh.data_hdr->stream); __u8 stream_state = SCTP_SO(&ctx->asoc->stream, sid)->state; /* Has this chunk expired? */ if (sctp_chunk_abandoned(chunk)) { sctp_sched_dequeue_done(ctx->q, chunk); sctp_chunk_fail(chunk, 0); sctp_chunk_free(chunk); continue; } if (stream_state == SCTP_STREAM_CLOSED) { sctp_outq_head_data(ctx->q, chunk); break; } sctp_outq_select_transport(ctx, chunk); pr_debug("%s: outq:%p, chunk:%p[%s], tx-tsn:0x%x skb->head:%p skb->users:%d\n", __func__, ctx->q, chunk, chunk && chunk->chunk_hdr ? sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : "illegal chunk", ntohl(chunk->subh.data_hdr->tsn), chunk->skb ? chunk->skb->head : NULL, chunk->skb ? refcount_read(&chunk->skb->users) : -1); /* Add the chunk to the packet. */ status = sctp_packet_transmit_chunk(ctx->packet, chunk, 0, ctx->gfp); if (status != SCTP_XMIT_OK) { /* We could not append this chunk, so put * the chunk back on the output queue. */ pr_debug("%s: could not transmit tsn:0x%x, status:%d\n", __func__, ntohl(chunk->subh.data_hdr->tsn), status); sctp_outq_head_data(ctx->q, chunk); break; } /* The sender is in the SHUTDOWN-PENDING state, * The sender MAY set the I-bit in the DATA * chunk header. */ if (ctx->asoc->state == SCTP_STATE_SHUTDOWN_PENDING) chunk->chunk_hdr->flags |= SCTP_DATA_SACK_IMM; if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) ctx->asoc->stats.ouodchunks++; else ctx->asoc->stats.oodchunks++; /* Only now it's safe to consider this * chunk as sent, sched-wise. */ sctp_sched_dequeue_done(ctx->q, chunk); list_add_tail(&chunk->transmitted_list, &ctx->transport->transmitted); sctp_transport_reset_t3_rtx(ctx->transport); ctx->transport->last_time_sent = jiffies; /* Only let one DATA chunk get bundled with a * COOKIE-ECHO chunk. */ if (ctx->packet->has_cookie_echo) break; } } static void sctp_outq_flush_transports(struct sctp_flush_ctx *ctx) { struct sock *sk = ctx->asoc->base.sk; struct list_head *ltransport; struct sctp_packet *packet; struct sctp_transport *t; int error = 0; while ((ltransport = sctp_list_dequeue(&ctx->transport_list)) != NULL) { t = list_entry(ltransport, struct sctp_transport, send_ready); packet = &t->packet; if (!sctp_packet_empty(packet)) { rcu_read_lock(); if (t->dst && __sk_dst_get(sk) != t->dst) { dst_hold(t->dst); sk_setup_caps(sk, t->dst); } rcu_read_unlock(); error = sctp_packet_transmit(packet, ctx->gfp); if (error < 0) ctx->q->asoc->base.sk->sk_err = -error; } /* Clear the burst limited state, if any */ sctp_transport_burst_reset(t); } } /* Try to flush an outqueue. * * Description: Send everything in q which we legally can, subject to * congestion limitations. * * Note: This function can be called from multiple contexts so appropriate * locking concerns must be made. Today we use the sock lock to protect * this function. */ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) { struct sctp_flush_ctx ctx = { .q = q, .transport = NULL, .transport_list = LIST_HEAD_INIT(ctx.transport_list), .asoc = q->asoc, .packet = NULL, .gfp = gfp, }; /* 6.10 Bundling * ... * When bundling control chunks with DATA chunks, an * endpoint MUST place control chunks first in the outbound * SCTP packet. The transmitter MUST transmit DATA chunks * within a SCTP packet in increasing order of TSN. * ... */ sctp_outq_flush_ctrl(&ctx); if (q->asoc->src_out_of_asoc_ok) goto sctp_flush_out; sctp_outq_flush_data(&ctx, rtx_timeout); sctp_flush_out: sctp_outq_flush_transports(&ctx); } /* Update unack_data based on the incoming SACK chunk */ static void sctp_sack_update_unack_data(struct sctp_association *assoc, struct sctp_sackhdr *sack) { union sctp_sack_variable *frags; __u16 unack_data; int i; unack_data = assoc->next_tsn - assoc->ctsn_ack_point - 1; frags = (union sctp_sack_variable *)(sack + 1); for (i = 0; i < ntohs(sack->num_gap_ack_blocks); i++) { unack_data -= ((ntohs(frags[i].gab.end) - ntohs(frags[i].gab.start) + 1)); } assoc->unack_data = unack_data; } /* This is where we REALLY process a SACK. * * Process the SACK against the outqueue. Mostly, this just frees * things off the transmitted queue. */ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) { struct sctp_association *asoc = q->asoc; struct sctp_sackhdr *sack = chunk->subh.sack_hdr; struct sctp_transport *transport; struct sctp_chunk *tchunk = NULL; struct list_head *lchunk, *transport_list, *temp; __u32 sack_ctsn, ctsn, tsn; __u32 highest_tsn, highest_new_tsn; __u32 sack_a_rwnd; unsigned int outstanding; struct sctp_transport *primary = asoc->peer.primary_path; int count_of_newacks = 0; int gap_ack_blocks; u8 accum_moved = 0; /* Grab the association's destination address list. */ transport_list = &asoc->peer.transport_addr_list; /* SCTP path tracepoint for congestion control debugging. */ if (trace_sctp_probe_path_enabled()) { list_for_each_entry(transport, transport_list, transports) trace_sctp_probe_path(transport, asoc); } sack_ctsn = ntohl(sack->cum_tsn_ack); gap_ack_blocks = ntohs(sack->num_gap_ack_blocks); asoc->stats.gapcnt += gap_ack_blocks; /* * SFR-CACC algorithm: * On receipt of a SACK the sender SHOULD execute the * following statements. * * 1) If the cumulative ack in the SACK passes next tsn_at_change * on the current primary, the CHANGEOVER_ACTIVE flag SHOULD be * cleared. The CYCLING_CHANGEOVER flag SHOULD also be cleared for * all destinations. * 2) If the SACK contains gap acks and the flag CHANGEOVER_ACTIVE * is set the receiver of the SACK MUST take the following actions: * * A) Initialize the cacc_saw_newack to 0 for all destination * addresses. * * Only bother if changeover_active is set. Otherwise, this is * totally suboptimal to do on every SACK. */ if (primary->cacc.changeover_active) { u8 clear_cycling = 0; if (TSN_lte(primary->cacc.next_tsn_at_change, sack_ctsn)) { primary->cacc.changeover_active = 0; clear_cycling = 1; } if (clear_cycling || gap_ack_blocks) { list_for_each_entry(transport, transport_list, transports) { if (clear_cycling) transport->cacc.cycling_changeover = 0; if (gap_ack_blocks) transport->cacc.cacc_saw_newack = 0; } } } /* Get the highest TSN in the sack. */ highest_tsn = sack_ctsn; if (gap_ack_blocks) { union sctp_sack_variable *frags = (union sctp_sack_variable *)(sack + 1); highest_tsn += ntohs(frags[gap_ack_blocks - 1].gab.end); } if (TSN_lt(asoc->highest_sacked, highest_tsn)) asoc->highest_sacked = highest_tsn; highest_new_tsn = sack_ctsn; /* Run through the retransmit queue. Credit bytes received * and free those chunks that we can. */ sctp_check_transmitted(q, &q->retransmit, NULL, NULL, sack, &highest_new_tsn); /* Run through the transmitted queue. * Credit bytes received and free those chunks which we can. * * This is a MASSIVE candidate for optimization. */ list_for_each_entry(transport, transport_list, transports) { sctp_check_transmitted(q, &transport->transmitted, transport, &chunk->source, sack, &highest_new_tsn); /* * SFR-CACC algorithm: * C) Let count_of_newacks be the number of * destinations for which cacc_saw_newack is set. */ if (transport->cacc.cacc_saw_newack) count_of_newacks++; } /* Move the Cumulative TSN Ack Point if appropriate. */ if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn)) { asoc->ctsn_ack_point = sack_ctsn; accum_moved = 1; } if (gap_ack_blocks) { if (asoc->fast_recovery && accum_moved) highest_new_tsn = highest_tsn; list_for_each_entry(transport, transport_list, transports) sctp_mark_missing(q, &transport->transmitted, transport, highest_new_tsn, count_of_newacks); } /* Update unack_data field in the assoc. */ sctp_sack_update_unack_data(asoc, sack); ctsn = asoc->ctsn_ack_point; /* Throw away stuff rotting on the sack queue. */ list_for_each_safe(lchunk, temp, &q->sacked) { tchunk = list_entry(lchunk, struct sctp_chunk, transmitted_list); tsn = ntohl(tchunk->subh.data_hdr->tsn); if (TSN_lte(tsn, ctsn)) { list_del_init(&tchunk->transmitted_list); if (asoc->peer.prsctp_capable && SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) asoc->sent_cnt_removable--; sctp_chunk_free(tchunk); } } /* ii) Set rwnd equal to the newly received a_rwnd minus the * number of bytes still outstanding after processing the * Cumulative TSN Ack and the Gap Ack Blocks. */ sack_a_rwnd = ntohl(sack->a_rwnd); asoc->peer.zero_window_announced = !sack_a_rwnd; outstanding = q->outstanding_bytes; if (outstanding < sack_a_rwnd) sack_a_rwnd -= outstanding; else sack_a_rwnd = 0; asoc->peer.rwnd = sack_a_rwnd; asoc->stream.si->generate_ftsn(q, sack_ctsn); pr_debug("%s: sack cumulative tsn ack:0x%x\n", __func__, sack_ctsn); pr_debug("%s: cumulative tsn ack of assoc:%p is 0x%x, " "advertised peer ack point:0x%x\n", __func__, asoc, ctsn, asoc->adv_peer_ack_point); return sctp_outq_is_empty(q); } /* Is the outqueue empty? * The queue is empty when we have not pending data, no in-flight data * and nothing pending retransmissions. */ int sctp_outq_is_empty(const struct sctp_outq *q) { return q->out_qlen == 0 && q->outstanding_bytes == 0 && list_empty(&q->retransmit); } /******************************************************************** * 2nd Level Abstractions ********************************************************************/ /* Go through a transport's transmitted list or the association's retransmit * list and move chunks that are acked by the Cumulative TSN Ack to q->sacked. * The retransmit list will not have an associated transport. * * I added coherent debug information output. --xguo * * Instead of printing 'sacked' or 'kept' for each TSN on the * transmitted_queue, we print a range: SACKED: TSN1-TSN2, TSN3, TSN4-TSN5. * KEPT TSN6-TSN7, etc. */ static void sctp_check_transmitted(struct sctp_outq *q, struct list_head *transmitted_queue, struct sctp_transport *transport, union sctp_addr *saddr, struct sctp_sackhdr *sack, __u32 *highest_new_tsn_in_sack) { struct list_head *lchunk; struct sctp_chunk *tchunk; struct list_head tlist; __u32 tsn; __u32 sack_ctsn; __u32 rtt; __u8 restart_timer = 0; int bytes_acked = 0; int migrate_bytes = 0; bool forward_progress = false; sack_ctsn = ntohl(sack->cum_tsn_ack); INIT_LIST_HEAD(&tlist); /* The while loop will skip empty transmitted queues. */ while (NULL != (lchunk = sctp_list_dequeue(transmitted_queue))) { tchunk = list_entry(lchunk, struct sctp_chunk, transmitted_list); if (sctp_chunk_abandoned(tchunk)) { /* Move the chunk to abandoned list. */ sctp_insert_list(&q->abandoned, lchunk); /* If this chunk has not been acked, stop * considering it as 'outstanding'. */ if (transmitted_queue != &q->retransmit && !tchunk->tsn_gap_acked) { if (tchunk->transport) tchunk->transport->flight_size -= sctp_data_size(tchunk); q->outstanding_bytes -= sctp_data_size(tchunk); } continue; } tsn = ntohl(tchunk->subh.data_hdr->tsn); if (sctp_acked(sack, tsn)) { /* If this queue is the retransmit queue, the * retransmit timer has already reclaimed * the outstanding bytes for this chunk, so only * count bytes associated with a transport. */ if (transport && !tchunk->tsn_gap_acked) { /* If this chunk is being used for RTT * measurement, calculate the RTT and update * the RTO using this value. * * 6.3.1 C5) Karn's algorithm: RTT measurements * MUST NOT be made using packets that were * retransmitted (and thus for which it is * ambiguous whether the reply was for the * first instance of the packet or a later * instance). */ if (!sctp_chunk_retransmitted(tchunk) && tchunk->rtt_in_progress) { tchunk->rtt_in_progress = 0; rtt = jiffies - tchunk->sent_at; sctp_transport_update_rto(transport, rtt); } if (TSN_lte(tsn, sack_ctsn)) { /* * SFR-CACC algorithm: * 2) If the SACK contains gap acks * and the flag CHANGEOVER_ACTIVE is * set the receiver of the SACK MUST * take the following action: * * B) For each TSN t being acked that * has not been acked in any SACK so * far, set cacc_saw_newack to 1 for * the destination that the TSN was * sent to. */ if (sack->num_gap_ack_blocks && q->asoc->peer.primary_path->cacc. changeover_active) transport->cacc.cacc_saw_newack = 1; } } /* If the chunk hasn't been marked as ACKED, * mark it and account bytes_acked if the * chunk had a valid transport (it will not * have a transport if ASCONF had deleted it * while DATA was outstanding). */ if (!tchunk->tsn_gap_acked) { tchunk->tsn_gap_acked = 1; if (TSN_lt(*highest_new_tsn_in_sack, tsn)) *highest_new_tsn_in_sack = tsn; bytes_acked += sctp_data_size(tchunk); if (!tchunk->transport) migrate_bytes += sctp_data_size(tchunk); forward_progress = true; } if (TSN_lte(tsn, sack_ctsn)) { /* RFC 2960 6.3.2 Retransmission Timer Rules * * R3) Whenever a SACK is received * that acknowledges the DATA chunk * with the earliest outstanding TSN * for that address, restart T3-rtx * timer for that address with its * current RTO. */ restart_timer = 1; forward_progress = true; list_add_tail(&tchunk->transmitted_list, &q->sacked); } else { /* RFC2960 7.2.4, sctpimpguide-05 2.8.2 * M2) Each time a SACK arrives reporting * 'Stray DATA chunk(s)' record the highest TSN * reported as newly acknowledged, call this * value 'HighestTSNinSack'. A newly * acknowledged DATA chunk is one not * previously acknowledged in a SACK. * * When the SCTP sender of data receives a SACK * chunk that acknowledges, for the first time, * the receipt of a DATA chunk, all the still * unacknowledged DATA chunks whose TSN is * older than that newly acknowledged DATA * chunk, are qualified as 'Stray DATA chunks'. */ list_add_tail(lchunk, &tlist); } } else { if (tchunk->tsn_gap_acked) { pr_debug("%s: receiver reneged on data TSN:0x%x\n", __func__, tsn); tchunk->tsn_gap_acked = 0; if (tchunk->transport) bytes_acked -= sctp_data_size(tchunk); /* RFC 2960 6.3.2 Retransmission Timer Rules * * R4) Whenever a SACK is received missing a * TSN that was previously acknowledged via a * Gap Ack Block, start T3-rtx for the * destination address to which the DATA * chunk was originally * transmitted if it is not already running. */ restart_timer = 1; } list_add_tail(lchunk, &tlist); } } if (transport) { if (bytes_acked) { struct sctp_association *asoc = transport->asoc; /* We may have counted DATA that was migrated * to this transport due to DEL-IP operation. * Subtract those bytes, since the were never * send on this transport and shouldn't be * credited to this transport. */ bytes_acked -= migrate_bytes; /* 8.2. When an outstanding TSN is acknowledged, * the endpoint shall clear the error counter of * the destination transport address to which the * DATA chunk was last sent. * The association's overall error counter is * also cleared. */ transport->error_count = 0; transport->asoc->overall_error_count = 0; forward_progress = true; /* * While in SHUTDOWN PENDING, we may have started * the T5 shutdown guard timer after reaching the * retransmission limit. Stop that timer as soon * as the receiver acknowledged any data. */ if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING && timer_delete(&asoc->timers[SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD])) sctp_association_put(asoc); /* Mark the destination transport address as * active if it is not so marked. */ if ((transport->state == SCTP_INACTIVE || transport->state == SCTP_UNCONFIRMED) && sctp_cmp_addr_exact(&transport->ipaddr, saddr)) { sctp_assoc_control_transport( transport->asoc, transport, SCTP_TRANSPORT_UP, SCTP_RECEIVED_SACK); } sctp_transport_raise_cwnd(transport, sack_ctsn, bytes_acked); transport->flight_size -= bytes_acked; if (transport->flight_size == 0) transport->partial_bytes_acked = 0; q->outstanding_bytes -= bytes_acked + migrate_bytes; } else { /* RFC 2960 6.1, sctpimpguide-06 2.15.2 * When a sender is doing zero window probing, it * should not timeout the association if it continues * to receive new packets from the receiver. The * reason is that the receiver MAY keep its window * closed for an indefinite time. * A sender is doing zero window probing when the * receiver's advertised window is zero, and there is * only one data chunk in flight to the receiver. * * Allow the association to timeout while in SHUTDOWN * PENDING or SHUTDOWN RECEIVED in case the receiver * stays in zero window mode forever. */ if (!q->asoc->peer.rwnd && !list_empty(&tlist) && (sack_ctsn+2 == q->asoc->next_tsn) && q->asoc->state < SCTP_STATE_SHUTDOWN_PENDING) { pr_debug("%s: sack received for zero window " "probe:%u\n", __func__, sack_ctsn); q->asoc->overall_error_count = 0; transport->error_count = 0; } } /* RFC 2960 6.3.2 Retransmission Timer Rules * * R2) Whenever all outstanding data sent to an address have * been acknowledged, turn off the T3-rtx timer of that * address. */ if (!transport->flight_size) { if (timer_delete(&transport->T3_rtx_timer)) sctp_transport_put(transport); } else if (restart_timer) { if (!mod_timer(&transport->T3_rtx_timer, jiffies + transport->rto)) sctp_transport_hold(transport); } if (forward_progress) { if (transport->dst) sctp_transport_dst_confirm(transport); } } list_splice(&tlist, transmitted_queue); } /* Mark chunks as missing and consequently may get retransmitted. */ static void sctp_mark_missing(struct sctp_outq *q, struct list_head *transmitted_queue, struct sctp_transport *transport, __u32 highest_new_tsn_in_sack, int count_of_newacks) { struct sctp_chunk *chunk; __u32 tsn; char do_fast_retransmit = 0; struct sctp_association *asoc = q->asoc; struct sctp_transport *primary = asoc->peer.primary_path; list_for_each_entry(chunk, transmitted_queue, transmitted_list) { tsn = ntohl(chunk->subh.data_hdr->tsn); /* RFC 2960 7.2.4, sctpimpguide-05 2.8.2 M3) Examine all * 'Unacknowledged TSN's', if the TSN number of an * 'Unacknowledged TSN' is smaller than the 'HighestTSNinSack' * value, increment the 'TSN.Missing.Report' count on that * chunk if it has NOT been fast retransmitted or marked for * fast retransmit already. */ if (chunk->fast_retransmit == SCTP_CAN_FRTX && !chunk->tsn_gap_acked && TSN_lt(tsn, highest_new_tsn_in_sack)) { /* SFR-CACC may require us to skip marking * this chunk as missing. */ if (!transport || !sctp_cacc_skip(primary, chunk->transport, count_of_newacks, tsn)) { chunk->tsn_missing_report++; pr_debug("%s: tsn:0x%x missing counter:%d\n", __func__, tsn, chunk->tsn_missing_report); } } /* * M4) If any DATA chunk is found to have a * 'TSN.Missing.Report' * value larger than or equal to 3, mark that chunk for * retransmission and start the fast retransmit procedure. */ if (chunk->tsn_missing_report >= 3) { chunk->fast_retransmit = SCTP_NEED_FRTX; do_fast_retransmit = 1; } } if (transport) { if (do_fast_retransmit) sctp_retransmit(q, transport, SCTP_RTXR_FAST_RTX); pr_debug("%s: transport:%p, cwnd:%d, ssthresh:%d, " "flight_size:%d, pba:%d\n", __func__, transport, transport->cwnd, transport->ssthresh, transport->flight_size, transport->partial_bytes_acked); } } /* Is the given TSN acked by this packet? */ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn) { __u32 ctsn = ntohl(sack->cum_tsn_ack); union sctp_sack_variable *frags; __u16 tsn_offset, blocks; int i; if (TSN_lte(tsn, ctsn)) goto pass; /* 3.3.4 Selective Acknowledgment (SACK) (3): * * Gap Ack Blocks: * These fields contain the Gap Ack Blocks. They are repeated * for each Gap Ack Block up to the number of Gap Ack Blocks * defined in the Number of Gap Ack Blocks field. All DATA * chunks with TSNs greater than or equal to (Cumulative TSN * Ack + Gap Ack Block Start) and less than or equal to * (Cumulative TSN Ack + Gap Ack Block End) of each Gap Ack * Block are assumed to have been received correctly. */ frags = (union sctp_sack_variable *)(sack + 1); blocks = ntohs(sack->num_gap_ack_blocks); tsn_offset = tsn - ctsn; for (i = 0; i < blocks; ++i) { if (tsn_offset >= ntohs(frags[i].gab.start) && tsn_offset <= ntohs(frags[i].gab.end)) goto pass; } return 0; pass: return 1; } static inline int sctp_get_skip_pos(struct sctp_fwdtsn_skip *skiplist, int nskips, __be16 stream) { int i; for (i = 0; i < nskips; i++) { if (skiplist[i].stream == stream) return i; } return i; } /* Create and add a fwdtsn chunk to the outq's control queue if needed. */ void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn) { struct sctp_association *asoc = q->asoc; struct sctp_chunk *ftsn_chunk = NULL; struct sctp_fwdtsn_skip ftsn_skip_arr[10]; int nskips = 0; int skip_pos = 0; __u32 tsn; struct sctp_chunk *chunk; struct list_head *lchunk, *temp; if (!asoc->peer.prsctp_capable) return; /* PR-SCTP C1) Let SackCumAck be the Cumulative TSN ACK carried in the * received SACK. * * If (Advanced.Peer.Ack.Point < SackCumAck), then update * Advanced.Peer.Ack.Point to be equal to SackCumAck. */ if (TSN_lt(asoc->adv_peer_ack_point, ctsn)) asoc->adv_peer_ack_point = ctsn; /* PR-SCTP C2) Try to further advance the "Advanced.Peer.Ack.Point" * locally, that is, to move "Advanced.Peer.Ack.Point" up as long as * the chunk next in the out-queue space is marked as "abandoned" as * shown in the following example: * * Assuming that a SACK arrived with the Cumulative TSN ACK 102 * and the Advanced.Peer.Ack.Point is updated to this value: * * out-queue at the end of ==> out-queue after Adv.Ack.Point * normal SACK processing local advancement * ... ... * Adv.Ack.Pt-> 102 acked 102 acked * 103 abandoned 103 abandoned * 104 abandoned Adv.Ack.P-> 104 abandoned * 105 105 * 106 acked 106 acked * ... ... * * In this example, the data sender successfully advanced the * "Advanced.Peer.Ack.Point" from 102 to 104 locally. */ list_for_each_safe(lchunk, temp, &q->abandoned) { chunk = list_entry(lchunk, struct sctp_chunk, transmitted_list); tsn = ntohl(chunk->subh.data_hdr->tsn); /* Remove any chunks in the abandoned queue that are acked by * the ctsn. */ if (TSN_lte(tsn, ctsn)) { list_del_init(lchunk); sctp_chunk_free(chunk); } else { if (TSN_lte(tsn, asoc->adv_peer_ack_point+1)) { asoc->adv_peer_ack_point = tsn; if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) continue; skip_pos = sctp_get_skip_pos(&ftsn_skip_arr[0], nskips, chunk->subh.data_hdr->stream); ftsn_skip_arr[skip_pos].stream = chunk->subh.data_hdr->stream; ftsn_skip_arr[skip_pos].ssn = chunk->subh.data_hdr->ssn; if (skip_pos == nskips) nskips++; if (nskips == 10) break; } else break; } } /* PR-SCTP C3) If, after step C1 and C2, the "Advanced.Peer.Ack.Point" * is greater than the Cumulative TSN ACK carried in the received * SACK, the data sender MUST send the data receiver a FORWARD TSN * chunk containing the latest value of the * "Advanced.Peer.Ack.Point". * * C4) For each "abandoned" TSN the sender of the FORWARD TSN SHOULD * list each stream and sequence number in the forwarded TSN. This * information will enable the receiver to easily find any * stranded TSN's waiting on stream reorder queues. Each stream * SHOULD only be reported once; this means that if multiple * abandoned messages occur in the same stream then only the * highest abandoned stream sequence number is reported. If the * total size of the FORWARD TSN does NOT fit in a single MTU then * the sender of the FORWARD TSN SHOULD lower the * Advanced.Peer.Ack.Point to the last TSN that will fit in a * single MTU. */ if (asoc->adv_peer_ack_point > ctsn) ftsn_chunk = sctp_make_fwdtsn(asoc, asoc->adv_peer_ack_point, nskips, &ftsn_skip_arr[0]); if (ftsn_chunk) { list_add_tail(&ftsn_chunk->list, &q->control_chunk_list); SCTP_INC_STATS(asoc->base.net, SCTP_MIB_OUTCTRLCHUNKS); } }
26 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 // SPDX-License-Identifier: GPL-2.0+ /* * drivers/of/property.c - Procedures for accessing and interpreting * Devicetree properties and graphs. * * Initially created by copying procedures from drivers/of/base.c. This * file contains the OF property as well as the OF graph interface * functions. * * Paul Mackerras August 1996. * Copyright (C) 1996-2005 Paul Mackerras. * * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner. * {engebret|bergner}@us.ibm.com * * Adapted for sparc and sparc64 by David S. Miller davem@davemloft.net * * Reconsolidated from arch/x/kernel/prom.c by Stephen Rothwell and * Grant Likely. */ #define pr_fmt(fmt) "OF: " fmt #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_device.h> #include <linux/of_graph.h> #include <linux/of_irq.h> #include <linux/string.h> #include <linux/moduleparam.h> #include "of_private.h" /** * of_property_read_bool - Find a property * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * * Search for a boolean property in a device node. Usage on non-boolean * property types is deprecated. * * Return: true if the property exists false otherwise. */ bool of_property_read_bool(const struct device_node *np, const char *propname) { struct property *prop = of_find_property(np, propname, NULL); /* * Boolean properties should not have a value. Testing for property * presence should either use of_property_present() or just read the * property value and check the returned error code. */ if (prop && prop->length) pr_warn("%pOF: Read of boolean property '%s' with a value.\n", np, propname); return prop ? true : false; } EXPORT_SYMBOL(of_property_read_bool); /** * of_graph_is_present() - check graph's presence * @node: pointer to device_node containing graph port * * Return: True if @node has a port or ports (with a port) sub-node, * false otherwise. */ bool of_graph_is_present(const struct device_node *node) { struct device_node *ports __free(device_node) = of_get_child_by_name(node, "ports"); if (ports) node = ports; struct device_node *port __free(device_node) = of_get_child_by_name(node, "port"); return !!port; } EXPORT_SYMBOL(of_graph_is_present); /** * of_property_count_elems_of_size - Count the number of elements in a property * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @elem_size: size of the individual element * * Search for a property in a device node and count the number of elements of * size elem_size in it. * * Return: The number of elements on sucess, -EINVAL if the property does not * exist or its length does not match a multiple of elem_size and -ENODATA if * the property does not have a value. */ int of_property_count_elems_of_size(const struct device_node *np, const char *propname, int elem_size) { const struct property *prop = of_find_property(np, propname, NULL); if (!prop) return -EINVAL; if (!prop->value) return -ENODATA; if (prop->length % elem_size != 0) { pr_err("size of %s in node %pOF is not a multiple of %d\n", propname, np, elem_size); return -EINVAL; } return prop->length / elem_size; } EXPORT_SYMBOL_GPL(of_property_count_elems_of_size); /** * of_find_property_value_of_size * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @min: minimum allowed length of property value * @max: maximum allowed length of property value (0 means unlimited) * @len: if !=NULL, actual length is written to here * * Search for a property in a device node and valid the requested size. * * Return: The property value on success, -EINVAL if the property does not * exist, -ENODATA if property does not have a value, and -EOVERFLOW if the * property data is too small or too large. * */ static void *of_find_property_value_of_size(const struct device_node *np, const char *propname, u32 min, u32 max, size_t *len) { const struct property *prop = of_find_property(np, propname, NULL); if (!prop) return ERR_PTR(-EINVAL); if (!prop->value) return ERR_PTR(-ENODATA); if (prop->length < min) return ERR_PTR(-EOVERFLOW); if (max && prop->length > max) return ERR_PTR(-EOVERFLOW); if (len) *len = prop->length; return prop->value; } /** * of_property_read_u16_index - Find and read a u16 from a multi-value property. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @index: index of the u16 in the list of values * @out_value: pointer to return value, modified only if no error. * * Search for a property in a device node and read nth 16-bit value from * it. * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_value is modified only if a valid u16 value can be decoded. */ int of_property_read_u16_index(const struct device_node *np, const char *propname, u32 index, u16 *out_value) { const u16 *val = of_find_property_value_of_size(np, propname, ((index + 1) * sizeof(*out_value)), 0, NULL); if (IS_ERR(val)) return PTR_ERR(val); *out_value = be16_to_cpup(((__be16 *)val) + index); return 0; } EXPORT_SYMBOL_GPL(of_property_read_u16_index); /** * of_property_read_u32_index - Find and read a u32 from a multi-value property. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @index: index of the u32 in the list of values * @out_value: pointer to return value, modified only if no error. * * Search for a property in a device node and read nth 32-bit value from * it. * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_value is modified only if a valid u32 value can be decoded. */ int of_property_read_u32_index(const struct device_node *np, const char *propname, u32 index, u32 *out_value) { const u32 *val = of_find_property_value_of_size(np, propname, ((index + 1) * sizeof(*out_value)), 0, NULL); if (IS_ERR(val)) return PTR_ERR(val); *out_value = be32_to_cpup(((__be32 *)val) + index); return 0; } EXPORT_SYMBOL_GPL(of_property_read_u32_index); /** * of_property_read_u64_index - Find and read a u64 from a multi-value property. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @index: index of the u64 in the list of values * @out_value: pointer to return value, modified only if no error. * * Search for a property in a device node and read nth 64-bit value from * it. * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_value is modified only if a valid u64 value can be decoded. */ int of_property_read_u64_index(const struct device_node *np, const char *propname, u32 index, u64 *out_value) { const u64 *val = of_find_property_value_of_size(np, propname, ((index + 1) * sizeof(*out_value)), 0, NULL); if (IS_ERR(val)) return PTR_ERR(val); *out_value = be64_to_cpup(((__be64 *)val) + index); return 0; } EXPORT_SYMBOL_GPL(of_property_read_u64_index); /** * of_property_read_variable_u8_array - Find and read an array of u8 from a * property, with bounds on the minimum and maximum array size. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to found values. * @sz_min: minimum number of array elements to read * @sz_max: maximum number of array elements to read, if zero there is no * upper limit on the number of elements in the dts entry but only * sz_min will be read. * * Search for a property in a device node and read 8-bit value(s) from * it. * * dts entry of array should be like: * ``property = /bits/ 8 <0x50 0x60 0x70>;`` * * Return: The number of elements read on success, -EINVAL if the property * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW * if the property data is smaller than sz_min or longer than sz_max. * * The out_values is modified only if a valid u8 value can be decoded. */ int of_property_read_variable_u8_array(const struct device_node *np, const char *propname, u8 *out_values, size_t sz_min, size_t sz_max) { size_t sz, count; const u8 *val = of_find_property_value_of_size(np, propname, (sz_min * sizeof(*out_values)), (sz_max * sizeof(*out_values)), &sz); if (IS_ERR(val)) return PTR_ERR(val); if (!sz_max) sz = sz_min; else sz /= sizeof(*out_values); count = sz; while (count--) *out_values++ = *val++; return sz; } EXPORT_SYMBOL_GPL(of_property_read_variable_u8_array); /** * of_property_read_variable_u16_array - Find and read an array of u16 from a * property, with bounds on the minimum and maximum array size. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to found values. * @sz_min: minimum number of array elements to read * @sz_max: maximum number of array elements to read, if zero there is no * upper limit on the number of elements in the dts entry but only * sz_min will be read. * * Search for a property in a device node and read 16-bit value(s) from * it. * * dts entry of array should be like: * ``property = /bits/ 16 <0x5000 0x6000 0x7000>;`` * * Return: The number of elements read on success, -EINVAL if the property * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW * if the property data is smaller than sz_min or longer than sz_max. * * The out_values is modified only if a valid u16 value can be decoded. */ int of_property_read_variable_u16_array(const struct device_node *np, const char *propname, u16 *out_values, size_t sz_min, size_t sz_max) { size_t sz, count; const __be16 *val = of_find_property_value_of_size(np, propname, (sz_min * sizeof(*out_values)), (sz_max * sizeof(*out_values)), &sz); if (IS_ERR(val)) return PTR_ERR(val); if (!sz_max) sz = sz_min; else sz /= sizeof(*out_values); count = sz; while (count--) *out_values++ = be16_to_cpup(val++); return sz; } EXPORT_SYMBOL_GPL(of_property_read_variable_u16_array); /** * of_property_read_variable_u32_array - Find and read an array of 32 bit * integers from a property, with bounds on the minimum and maximum array size. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to return found values. * @sz_min: minimum number of array elements to read * @sz_max: maximum number of array elements to read, if zero there is no * upper limit on the number of elements in the dts entry but only * sz_min will be read. * * Search for a property in a device node and read 32-bit value(s) from * it. * * Return: The number of elements read on success, -EINVAL if the property * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW * if the property data is smaller than sz_min or longer than sz_max. * * The out_values is modified only if a valid u32 value can be decoded. */ int of_property_read_variable_u32_array(const struct device_node *np, const char *propname, u32 *out_values, size_t sz_min, size_t sz_max) { size_t sz, count; const __be32 *val = of_find_property_value_of_size(np, propname, (sz_min * sizeof(*out_values)), (sz_max * sizeof(*out_values)), &sz); if (IS_ERR(val)) return PTR_ERR(val); if (!sz_max) sz = sz_min; else sz /= sizeof(*out_values); count = sz; while (count--) *out_values++ = be32_to_cpup(val++); return sz; } EXPORT_SYMBOL_GPL(of_property_read_variable_u32_array); /** * of_property_read_u64 - Find and read a 64 bit integer from a property * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_value: pointer to return value, modified only if return value is 0. * * Search for a property in a device node and read a 64-bit value from * it. * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_value is modified only if a valid u64 value can be decoded. */ int of_property_read_u64(const struct device_node *np, const char *propname, u64 *out_value) { const __be32 *val = of_find_property_value_of_size(np, propname, sizeof(*out_value), 0, NULL); if (IS_ERR(val)) return PTR_ERR(val); *out_value = of_read_number(val, 2); return 0; } EXPORT_SYMBOL_GPL(of_property_read_u64); /** * of_property_read_variable_u64_array - Find and read an array of 64 bit * integers from a property, with bounds on the minimum and maximum array size. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to found values. * @sz_min: minimum number of array elements to read * @sz_max: maximum number of array elements to read, if zero there is no * upper limit on the number of elements in the dts entry but only * sz_min will be read. * * Search for a property in a device node and read 64-bit value(s) from * it. * * Return: The number of elements read on success, -EINVAL if the property * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW * if the property data is smaller than sz_min or longer than sz_max. * * The out_values is modified only if a valid u64 value can be decoded. */ int of_property_read_variable_u64_array(const struct device_node *np, const char *propname, u64 *out_values, size_t sz_min, size_t sz_max) { size_t sz, count; const __be32 *val = of_find_property_value_of_size(np, propname, (sz_min * sizeof(*out_values)), (sz_max * sizeof(*out_values)), &sz); if (IS_ERR(val)) return PTR_ERR(val); if (!sz_max) sz = sz_min; else sz /= sizeof(*out_values); count = sz; while (count--) { *out_values++ = of_read_number(val, 2); val += 2; } return sz; } EXPORT_SYMBOL_GPL(of_property_read_variable_u64_array); /** * of_property_read_string - Find and read a string from a property * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_string: pointer to null terminated return string, modified only if * return value is 0. * * Search for a property in a device tree node and retrieve a null * terminated string value (pointer to data, not a copy). * * Return: 0 on success, -EINVAL if the property does not exist, -ENODATA if * property does not have a value, and -EILSEQ if the string is not * null-terminated within the length of the property data. * * Note that the empty string "" has length of 1, thus -ENODATA cannot * be interpreted as an empty string. * * The out_string pointer is modified only if a valid string can be decoded. */ int of_property_read_string(const struct device_node *np, const char *propname, const char **out_string) { const struct property *prop = of_find_property(np, propname, NULL); if (!prop) return -EINVAL; if (!prop->length) return -ENODATA; if (strnlen(prop->value, prop->length) >= prop->length) return -EILSEQ; *out_string = prop->value; return 0; } EXPORT_SYMBOL_GPL(of_property_read_string); /** * of_property_match_string() - Find string in a list and return index * @np: pointer to the node containing the string list property * @propname: string list property name * @string: pointer to the string to search for in the string list * * Search for an exact match of string in a device node property which is a * string of lists. * * Return: the index of the first occurrence of the string on success, -EINVAL * if the property does not exist, -ENODATA if the property does not have a * value, and -EILSEQ if the string is not null-terminated within the length of * the property data. */ int of_property_match_string(const struct device_node *np, const char *propname, const char *string) { const struct property *prop = of_find_property(np, propname, NULL); size_t l; int i; const char *p, *end; if (!prop) return -EINVAL; if (!prop->value) return -ENODATA; p = prop->value; end = p + prop->length; for (i = 0; p < end; i++, p += l) { l = strnlen(p, end - p) + 1; if (p + l > end) return -EILSEQ; pr_debug("comparing %s with %s\n", string, p); if (strcmp(string, p) == 0) return i; /* Found it; return index */ } return -ENODATA; } EXPORT_SYMBOL_GPL(of_property_match_string); /** * of_property_read_string_helper() - Utility helper for parsing string properties * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_strs: output array of string pointers. * @sz: number of array elements to read. * @skip: Number of strings to skip over at beginning of list. * * Don't call this function directly. It is a utility helper for the * of_property_read_string*() family of functions. */ int of_property_read_string_helper(const struct device_node *np, const char *propname, const char **out_strs, size_t sz, int skip) { const struct property *prop = of_find_property(np, propname, NULL); int l = 0, i = 0; const char *p, *end; if (!prop) return -EINVAL; if (!prop->value) return -ENODATA; p = prop->value; end = p + prop->length; for (i = 0; p < end && (!out_strs || i < skip + sz); i++, p += l) { l = strnlen(p, end - p) + 1; if (p + l > end) return -EILSEQ; if (out_strs && i >= skip) *out_strs++ = p; } i -= skip; return i <= 0 ? -ENODATA : i; } EXPORT_SYMBOL_GPL(of_property_read_string_helper); const __be32 *of_prop_next_u32(const struct property *prop, const __be32 *cur, u32 *pu) { const void *curv = cur; if (!prop) return NULL; if (!cur) { curv = prop->value; goto out_val; } curv += sizeof(*cur); if (curv >= prop->value + prop->length) return NULL; out_val: *pu = be32_to_cpup(curv); return curv; } EXPORT_SYMBOL_GPL(of_prop_next_u32); const char *of_prop_next_string(const struct property *prop, const char *cur) { const void *curv = cur; if (!prop) return NULL; if (!cur) return prop->value; curv += strlen(cur) + 1; if (curv >= prop->value + prop->length) return NULL; return curv; } EXPORT_SYMBOL_GPL(of_prop_next_string); /** * of_graph_parse_endpoint() - parse common endpoint node properties * @node: pointer to endpoint device_node * @endpoint: pointer to the OF endpoint data structure * * The caller should hold a reference to @node. */ int of_graph_parse_endpoint(const struct device_node *node, struct of_endpoint *endpoint) { struct device_node *port_node __free(device_node) = of_get_parent(node); WARN_ONCE(!port_node, "%s(): endpoint %pOF has no parent node\n", __func__, node); memset(endpoint, 0, sizeof(*endpoint)); endpoint->local_node = node; /* * It doesn't matter whether the two calls below succeed. * If they don't then the default value 0 is used. */ of_property_read_u32(port_node, "reg", &endpoint->port); of_property_read_u32(node, "reg", &endpoint->id); return 0; } EXPORT_SYMBOL(of_graph_parse_endpoint); /** * of_graph_get_port_by_id() - get the port matching a given id * @parent: pointer to the parent device node * @id: id of the port * * Return: A 'port' node pointer with refcount incremented. The caller * has to use of_node_put() on it when done. */ struct device_node *of_graph_get_port_by_id(struct device_node *parent, u32 id) { struct device_node *node __free(device_node) = of_get_child_by_name(parent, "ports"); if (node) parent = node; for_each_child_of_node_scoped(parent, port) { u32 port_id = 0; if (!of_node_name_eq(port, "port")) continue; of_property_read_u32(port, "reg", &port_id); if (id == port_id) return_ptr(port); } return NULL; } EXPORT_SYMBOL(of_graph_get_port_by_id); /** * of_graph_get_next_port() - get next port node. * @parent: pointer to the parent device node, or parent ports node * @prev: previous port node, or NULL to get first * * Parent device node can be used as @parent whether device node has ports node * or not. It will work same as ports@0 node. * * Return: A 'port' node pointer with refcount incremented. Refcount * of the passed @prev node is decremented. */ struct device_node *of_graph_get_next_port(const struct device_node *parent, struct device_node *prev) { if (!parent) return NULL; if (!prev) { struct device_node *node __free(device_node) = of_get_child_by_name(parent, "ports"); if (node) parent = node; return of_get_child_by_name(parent, "port"); } do { prev = of_get_next_child(parent, prev); if (!prev) break; } while (!of_node_name_eq(prev, "port")); return prev; } EXPORT_SYMBOL(of_graph_get_next_port); /** * of_graph_get_next_port_endpoint() - get next endpoint node in port. * If it reached to end of the port, it will return NULL. * @port: pointer to the target port node * @prev: previous endpoint node, or NULL to get first * * Return: An 'endpoint' node pointer with refcount incremented. Refcount * of the passed @prev node is decremented. */ struct device_node *of_graph_get_next_port_endpoint(const struct device_node *port, struct device_node *prev) { while (1) { prev = of_get_next_child(port, prev); if (!prev) break; if (WARN(!of_node_name_eq(prev, "endpoint"), "non endpoint node is used (%pOF)", prev)) continue; break; } return prev; } EXPORT_SYMBOL(of_graph_get_next_port_endpoint); /** * of_graph_get_next_endpoint() - get next endpoint node * @parent: pointer to the parent device node * @prev: previous endpoint node, or NULL to get first * * Return: An 'endpoint' node pointer with refcount incremented. Refcount * of the passed @prev node is decremented. */ struct device_node *of_graph_get_next_endpoint(const struct device_node *parent, struct device_node *prev) { struct device_node *endpoint; struct device_node *port; if (!parent) return NULL; /* * Start by locating the port node. If no previous endpoint is specified * search for the first port node, otherwise get the previous endpoint * parent port node. */ if (!prev) { port = of_graph_get_next_port(parent, NULL); if (!port) { pr_debug("graph: no port node found in %pOF\n", parent); return NULL; } } else { port = of_get_parent(prev); if (WARN_ONCE(!port, "%s(): endpoint %pOF has no parent node\n", __func__, prev)) return NULL; } while (1) { /* * Now that we have a port node, get the next endpoint by * getting the next child. If the previous endpoint is NULL this * will return the first child. */ endpoint = of_graph_get_next_port_endpoint(port, prev); if (endpoint) { of_node_put(port); return endpoint; } /* No more endpoints under this port, try the next one. */ prev = NULL; port = of_graph_get_next_port(parent, port); if (!port) return NULL; } } EXPORT_SYMBOL(of_graph_get_next_endpoint); /** * of_graph_get_endpoint_by_regs() - get endpoint node of specific identifiers * @parent: pointer to the parent device node * @port_reg: identifier (value of reg property) of the parent port node * @reg: identifier (value of reg property) of the endpoint node * * Return: An 'endpoint' node pointer which is identified by reg and at the same * is the child of a port node identified by port_reg. reg and port_reg are * ignored when they are -1. Use of_node_put() on the pointer when done. */ struct device_node *of_graph_get_endpoint_by_regs( const struct device_node *parent, int port_reg, int reg) { struct of_endpoint endpoint; struct device_node *node = NULL; for_each_endpoint_of_node(parent, node) { of_graph_parse_endpoint(node, &endpoint); if (((port_reg == -1) || (endpoint.port == port_reg)) && ((reg == -1) || (endpoint.id == reg))) return node; } return NULL; } EXPORT_SYMBOL(of_graph_get_endpoint_by_regs); /** * of_graph_get_remote_endpoint() - get remote endpoint node * @node: pointer to a local endpoint device_node * * Return: Remote endpoint node associated with remote endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_remote_endpoint(const struct device_node *node) { /* Get remote endpoint node. */ return of_parse_phandle(node, "remote-endpoint", 0); } EXPORT_SYMBOL(of_graph_get_remote_endpoint); /** * of_graph_get_port_parent() - get port's parent node * @node: pointer to a local endpoint device_node * * Return: device node associated with endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_port_parent(struct device_node *node) { unsigned int depth; if (!node) return NULL; /* * Preserve usecount for passed in node as of_get_next_parent() * will do of_node_put() on it. */ of_node_get(node); /* Walk 3 levels up only if there is 'ports' node. */ for (depth = 3; depth && node; depth--) { node = of_get_next_parent(node); if (depth == 2 && !of_node_name_eq(node, "ports") && !of_node_name_eq(node, "in-ports") && !of_node_name_eq(node, "out-ports")) break; } return node; } EXPORT_SYMBOL(of_graph_get_port_parent); /** * of_graph_get_remote_port_parent() - get remote port's parent node * @node: pointer to a local endpoint device_node * * Return: Remote device node associated with remote endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_remote_port_parent( const struct device_node *node) { /* Get remote endpoint node. */ struct device_node *np __free(device_node) = of_graph_get_remote_endpoint(node); return of_graph_get_port_parent(np); } EXPORT_SYMBOL(of_graph_get_remote_port_parent); /** * of_graph_get_remote_port() - get remote port node * @node: pointer to a local endpoint device_node * * Return: Remote port node associated with remote endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_remote_port(const struct device_node *node) { struct device_node *np; /* Get remote endpoint node. */ np = of_graph_get_remote_endpoint(node); if (!np) return NULL; return of_get_next_parent(np); } EXPORT_SYMBOL(of_graph_get_remote_port); /** * of_graph_get_endpoint_count() - get the number of endpoints in a device node * @np: parent device node containing ports and endpoints * * Return: count of endpoint of this device node */ unsigned int of_graph_get_endpoint_count(const struct device_node *np) { struct device_node *endpoint; unsigned int num = 0; for_each_endpoint_of_node(np, endpoint) num++; return num; } EXPORT_SYMBOL(of_graph_get_endpoint_count); /** * of_graph_get_port_count() - get the number of port in a device or ports node * @np: pointer to the device or ports node * * Return: count of port of this device or ports node */ unsigned int of_graph_get_port_count(struct device_node *np) { unsigned int num = 0; for_each_of_graph_port(np, port) num++; return num; } EXPORT_SYMBOL(of_graph_get_port_count); /** * of_graph_get_remote_node() - get remote parent device_node for given port/endpoint * @node: pointer to parent device_node containing graph port/endpoint * @port: identifier (value of reg property) of the parent port node * @endpoint: identifier (value of reg property) of the endpoint node * * Return: Remote device node associated with remote endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_remote_node(const struct device_node *node, u32 port, u32 endpoint) { struct device_node *endpoint_node, *remote; endpoint_node = of_graph_get_endpoint_by_regs(node, port, endpoint); if (!endpoint_node) { pr_debug("no valid endpoint (%d, %d) for node %pOF\n", port, endpoint, node); return NULL; } remote = of_graph_get_remote_port_parent(endpoint_node); of_node_put(endpoint_node); if (!remote) { pr_debug("no valid remote node\n"); return NULL; } if (!of_device_is_available(remote)) { pr_debug("not available for remote node\n"); of_node_put(remote); return NULL; } return remote; } EXPORT_SYMBOL(of_graph_get_remote_node); static struct fwnode_handle *of_fwnode_get(struct fwnode_handle *fwnode) { return of_fwnode_handle(of_node_get(to_of_node(fwnode))); } static void of_fwnode_put(struct fwnode_handle *fwnode) { of_node_put(to_of_node(fwnode)); } static bool of_fwnode_device_is_available(const struct fwnode_handle *fwnode) { return of_device_is_available(to_of_node(fwnode)); } static bool of_fwnode_device_dma_supported(const struct fwnode_handle *fwnode) { return true; } static enum dev_dma_attr of_fwnode_device_get_dma_attr(const struct fwnode_handle *fwnode) { if (of_dma_is_coherent(to_of_node(fwnode))) return DEV_DMA_COHERENT; else return DEV_DMA_NON_COHERENT; } static bool of_fwnode_property_present(const struct fwnode_handle *fwnode, const char *propname) { return of_property_present(to_of_node(fwnode), propname); } static bool of_fwnode_property_read_bool(const struct fwnode_handle *fwnode, const char *propname) { return of_property_read_bool(to_of_node(fwnode), propname); } static int of_fwnode_property_read_int_array(const struct fwnode_handle *fwnode, const char *propname, unsigned int elem_size, void *val, size_t nval) { const struct device_node *node = to_of_node(fwnode); if (!val) return of_property_count_elems_of_size(node, propname, elem_size); switch (elem_size) { case sizeof(u8): return of_property_read_u8_array(node, propname, val, nval); case sizeof(u16): return of_property_read_u16_array(node, propname, val, nval); case sizeof(u32): return of_property_read_u32_array(node, propname, val, nval); case sizeof(u64): return of_property_read_u64_array(node, propname, val, nval); } return -ENXIO; } static int of_fwnode_property_read_string_array(const struct fwnode_handle *fwnode, const char *propname, const char **val, size_t nval) { const struct device_node *node = to_of_node(fwnode); return val ? of_property_read_string_array(node, propname, val, nval) : of_property_count_strings(node, propname); } static const char *of_fwnode_get_name(const struct fwnode_handle *fwnode) { return kbasename(to_of_node(fwnode)->full_name); } static const char *of_fwnode_get_name_prefix(const struct fwnode_handle *fwnode) { /* Root needs no prefix here (its name is "/"). */ if (!to_of_node(fwnode)->parent) return ""; return "/"; } static struct fwnode_handle * of_fwnode_get_parent(const struct fwnode_handle *fwnode) { return of_fwnode_handle(of_get_parent(to_of_node(fwnode))); } static struct fwnode_handle * of_fwnode_get_next_child_node(const struct fwnode_handle *fwnode, struct fwnode_handle *child) { return of_fwnode_handle(of_get_next_available_child(to_of_node(fwnode), to_of_node(child))); } static struct fwnode_handle * of_fwnode_get_named_child_node(const struct fwnode_handle *fwnode, const char *childname) { const struct device_node *node = to_of_node(fwnode); struct device_node *child; for_each_available_child_of_node(node, child) if (of_node_name_eq(child, childname)) return of_fwnode_handle(child); return NULL; } static int of_fwnode_get_reference_args(const struct fwnode_handle *fwnode, const char *prop, const char *nargs_prop, unsigned int nargs, unsigned int index, struct fwnode_reference_args *args) { struct of_phandle_args of_args; unsigned int i; int ret; if (nargs_prop) ret = of_parse_phandle_with_args(to_of_node(fwnode), prop, nargs_prop, index, &of_args); else ret = of_parse_phandle_with_fixed_args(to_of_node(fwnode), prop, nargs, index, &of_args); if (ret < 0) return ret; if (!args) { of_node_put(of_args.np); return 0; } args->nargs = of_args.args_count; args->fwnode = of_fwnode_handle(of_args.np); for (i = 0; i < NR_FWNODE_REFERENCE_ARGS; i++) args->args[i] = i < of_args.args_count ? of_args.args[i] : 0; return 0; } static struct fwnode_handle * of_fwnode_graph_get_next_endpoint(const struct fwnode_handle *fwnode, struct fwnode_handle *prev) { return of_fwnode_handle(of_graph_get_next_endpoint(to_of_node(fwnode), to_of_node(prev))); } static struct fwnode_handle * of_fwnode_graph_get_remote_endpoint(const struct fwnode_handle *fwnode) { return of_fwnode_handle( of_graph_get_remote_endpoint(to_of_node(fwnode))); } static struct fwnode_handle * of_fwnode_graph_get_port_parent(struct fwnode_handle *fwnode) { struct device_node *np; /* Get the parent of the port */ np = of_get_parent(to_of_node(fwnode)); if (!np) return NULL; /* Is this the "ports" node? If not, it's the port parent. */ if (!of_node_name_eq(np, "ports")) return of_fwnode_handle(np); return of_fwnode_handle(of_get_next_parent(np)); } static int of_fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint) { const struct device_node *node = to_of_node(fwnode); struct device_node *port_node __free(device_node) = of_get_parent(node); endpoint->local_fwnode = fwnode; of_property_read_u32(port_node, "reg", &endpoint->port); of_property_read_u32(node, "reg", &endpoint->id); return 0; } static const void * of_fwnode_device_get_match_data(const struct fwnode_handle *fwnode, const struct device *dev) { return of_device_get_match_data(dev); } static void of_link_to_phandle(struct device_node *con_np, struct device_node *sup_np, u8 flags) { struct device_node *tmp_np __free(device_node) = of_node_get(sup_np); /* Check that sup_np and its ancestors are available. */ while (tmp_np) { if (of_fwnode_handle(tmp_np)->dev) break; if (!of_device_is_available(tmp_np)) return; tmp_np = of_get_next_parent(tmp_np); } fwnode_link_add(of_fwnode_handle(con_np), of_fwnode_handle(sup_np), flags); } /** * parse_prop_cells - Property parsing function for suppliers * * @np: Pointer to device tree node containing a list * @prop_name: Name of property to be parsed. Expected to hold phandle values * @index: For properties holding a list of phandles, this is the index * into the list. * @list_name: Property name that is known to contain list of phandle(s) to * supplier(s) * @cells_name: property name that specifies phandles' arguments count * * This is a helper function to parse properties that have a known fixed name * and are a list of phandles and phandle arguments. * * Returns: * - phandle node pointer with refcount incremented. Caller must of_node_put() * on it when done. * - NULL if no phandle found at index */ static struct device_node *parse_prop_cells(struct device_node *np, const char *prop_name, int index, const char *list_name, const char *cells_name) { struct of_phandle_args sup_args; if (strcmp(prop_name, list_name)) return NULL; if (__of_parse_phandle_with_args(np, list_name, cells_name, 0, index, &sup_args)) return NULL; return sup_args.np; } #define DEFINE_SIMPLE_PROP(fname, name, cells) \ static struct device_node *parse_##fname(struct device_node *np, \ const char *prop_name, int index) \ { \ return parse_prop_cells(np, prop_name, index, name, cells); \ } static int strcmp_suffix(const char *str, const char *suffix) { unsigned int len, suffix_len; len = strlen(str); suffix_len = strlen(suffix); if (len <= suffix_len) return -1; return strcmp(str + len - suffix_len, suffix); } /** * parse_suffix_prop_cells - Suffix property parsing function for suppliers * * @np: Pointer to device tree node containing a list * @prop_name: Name of property to be parsed. Expected to hold phandle values * @index: For properties holding a list of phandles, this is the index * into the list. * @suffix: Property suffix that is known to contain list of phandle(s) to * supplier(s) * @cells_name: property name that specifies phandles' arguments count * * This is a helper function to parse properties that have a known fixed suffix * and are a list of phandles and phandle arguments. * * Returns: * - phandle node pointer with refcount incremented. Caller must of_node_put() * on it when done. * - NULL if no phandle found at index */ static struct device_node *parse_suffix_prop_cells(struct device_node *np, const char *prop_name, int index, const char *suffix, const char *cells_name) { struct of_phandle_args sup_args; if (strcmp_suffix(prop_name, suffix)) return NULL; if (of_parse_phandle_with_args(np, prop_name, cells_name, index, &sup_args)) return NULL; return sup_args.np; } #define DEFINE_SUFFIX_PROP(fname, suffix, cells) \ static struct device_node *parse_##fname(struct device_node *np, \ const char *prop_name, int index) \ { \ return parse_suffix_prop_cells(np, prop_name, index, suffix, cells); \ } /** * struct supplier_bindings - Property parsing functions for suppliers * * @parse_prop: function name * parse_prop() finds the node corresponding to a supplier phandle * parse_prop.np: Pointer to device node holding supplier phandle property * parse_prop.prop_name: Name of property holding a phandle value * parse_prop.index: For properties holding a list of phandles, this is the * index into the list * @get_con_dev: If the consumer node containing the property is never converted * to a struct device, implement this ops so fw_devlink can use it * to find the true consumer. * @optional: Describes whether a supplier is mandatory or not * @fwlink_flags: Optional fwnode link flags to use when creating a fwnode link * for this property. * * Returns: * parse_prop() return values are * - phandle node pointer with refcount incremented. Caller must of_node_put() * on it when done. * - NULL if no phandle found at index */ struct supplier_bindings { struct device_node *(*parse_prop)(struct device_node *np, const char *prop_name, int index); struct device_node *(*get_con_dev)(struct device_node *np); bool optional; u8 fwlink_flags; }; DEFINE_SIMPLE_PROP(clocks, "clocks", "#clock-cells") DEFINE_SIMPLE_PROP(interconnects, "interconnects", "#interconnect-cells") DEFINE_SIMPLE_PROP(iommus, "iommus", "#iommu-cells") DEFINE_SIMPLE_PROP(mboxes, "mboxes", "#mbox-cells") DEFINE_SIMPLE_PROP(io_channels, "io-channels", "#io-channel-cells") DEFINE_SIMPLE_PROP(io_backends, "io-backends", "#io-backend-cells") DEFINE_SIMPLE_PROP(dmas, "dmas", "#dma-cells") DEFINE_SIMPLE_PROP(power_domains, "power-domains", "#power-domain-cells") DEFINE_SIMPLE_PROP(hwlocks, "hwlocks", "#hwlock-cells") DEFINE_SIMPLE_PROP(extcon, "extcon", NULL) DEFINE_SIMPLE_PROP(nvmem_cells, "nvmem-cells", "#nvmem-cell-cells") DEFINE_SIMPLE_PROP(phys, "phys", "#phy-cells") DEFINE_SIMPLE_PROP(wakeup_parent, "wakeup-parent", NULL) DEFINE_SIMPLE_PROP(pinctrl0, "pinctrl-0", NULL) DEFINE_SIMPLE_PROP(pinctrl1, "pinctrl-1", NULL) DEFINE_SIMPLE_PROP(pinctrl2, "pinctrl-2", NULL) DEFINE_SIMPLE_PROP(pinctrl3, "pinctrl-3", NULL) DEFINE_SIMPLE_PROP(pinctrl4, "pinctrl-4", NULL) DEFINE_SIMPLE_PROP(pinctrl5, "pinctrl-5", NULL) DEFINE_SIMPLE_PROP(pinctrl6, "pinctrl-6", NULL) DEFINE_SIMPLE_PROP(pinctrl7, "pinctrl-7", NULL) DEFINE_SIMPLE_PROP(pinctrl8, "pinctrl-8", NULL) DEFINE_SIMPLE_PROP(pwms, "pwms", "#pwm-cells") DEFINE_SIMPLE_PROP(resets, "resets", "#reset-cells") DEFINE_SIMPLE_PROP(leds, "leds", NULL) DEFINE_SIMPLE_PROP(backlight, "backlight", NULL) DEFINE_SIMPLE_PROP(panel, "panel", NULL) DEFINE_SIMPLE_PROP(msi_parent, "msi-parent", "#msi-cells") DEFINE_SIMPLE_PROP(post_init_providers, "post-init-providers", NULL) DEFINE_SIMPLE_PROP(access_controllers, "access-controllers", "#access-controller-cells") DEFINE_SIMPLE_PROP(pses, "pses", "#pse-cells") DEFINE_SIMPLE_PROP(power_supplies, "power-supplies", NULL) DEFINE_SUFFIX_PROP(regulators, "-supply", NULL) DEFINE_SUFFIX_PROP(gpio, "-gpio", "#gpio-cells") static struct device_node *parse_gpios(struct device_node *np, const char *prop_name, int index) { if (!strcmp_suffix(prop_name, ",nr-gpios")) return NULL; return parse_suffix_prop_cells(np, prop_name, index, "-gpios", "#gpio-cells"); } static struct device_node *parse_iommu_maps(struct device_node *np, const char *prop_name, int index) { if (strcmp(prop_name, "iommu-map")) return NULL; return of_parse_phandle(np, prop_name, (index * 4) + 1); } static struct device_node *parse_gpio_compat(struct device_node *np, const char *prop_name, int index) { struct of_phandle_args sup_args; if (strcmp(prop_name, "gpio") && strcmp(prop_name, "gpios")) return NULL; /* * Ignore node with gpio-hog property since its gpios are all provided * by its parent. */ if (of_property_read_bool(np, "gpio-hog")) return NULL; if (of_parse_phandle_with_args(np, prop_name, "#gpio-cells", index, &sup_args)) return NULL; return sup_args.np; } static struct device_node *parse_interrupts(struct device_node *np, const char *prop_name, int index) { struct of_phandle_args sup_args; if (!IS_ENABLED(CONFIG_OF_IRQ) || IS_ENABLED(CONFIG_PPC)) return NULL; if (strcmp(prop_name, "interrupts") && strcmp(prop_name, "interrupts-extended")) return NULL; return of_irq_parse_one(np, index, &sup_args) ? NULL : sup_args.np; } static struct device_node *parse_interrupt_map(struct device_node *np, const char *prop_name, int index) { const __be32 *imap, *imap_end; struct of_phandle_args sup_args; u32 addrcells, intcells; int imaplen; if (!IS_ENABLED(CONFIG_OF_IRQ)) return NULL; if (strcmp(prop_name, "interrupt-map")) return NULL; if (of_property_read_u32(np, "#interrupt-cells", &intcells)) return NULL; addrcells = of_bus_n_addr_cells(np); imap = of_get_property(np, "interrupt-map", &imaplen); if (!imap) return NULL; imaplen /= sizeof(*imap); imap_end = imap + imaplen; for (int i = 0; imap + addrcells + intcells + 1 < imap_end; i++) { imap += addrcells + intcells; imap = of_irq_parse_imap_parent(imap, imap_end - imap, &sup_args); if (!imap) return NULL; if (i == index) return sup_args.np; of_node_put(sup_args.np); } return NULL; } static struct device_node *parse_remote_endpoint(struct device_node *np, const char *prop_name, int index) { /* Return NULL for index > 0 to signify end of remote-endpoints. */ if (index > 0 || strcmp(prop_name, "remote-endpoint")) return NULL; return of_graph_get_remote_port_parent(np); } static const struct supplier_bindings of_supplier_bindings[] = { { .parse_prop = parse_clocks, }, { .parse_prop = parse_interconnects, }, { .parse_prop = parse_iommus, .optional = true, }, { .parse_prop = parse_iommu_maps, .optional = true, }, { .parse_prop = parse_mboxes, }, { .parse_prop = parse_io_channels, }, { .parse_prop = parse_io_backends, }, { .parse_prop = parse_dmas, .optional = true, }, { .parse_prop = parse_power_domains, }, { .parse_prop = parse_hwlocks, }, { .parse_prop = parse_extcon, }, { .parse_prop = parse_nvmem_cells, }, { .parse_prop = parse_phys, }, { .parse_prop = parse_wakeup_parent, }, { .parse_prop = parse_pinctrl0, }, { .parse_prop = parse_pinctrl1, }, { .parse_prop = parse_pinctrl2, }, { .parse_prop = parse_pinctrl3, }, { .parse_prop = parse_pinctrl4, }, { .parse_prop = parse_pinctrl5, }, { .parse_prop = parse_pinctrl6, }, { .parse_prop = parse_pinctrl7, }, { .parse_prop = parse_pinctrl8, }, { .parse_prop = parse_remote_endpoint, .get_con_dev = of_graph_get_port_parent, }, { .parse_prop = parse_pwms, }, { .parse_prop = parse_resets, }, { .parse_prop = parse_leds, }, { .parse_prop = parse_backlight, }, { .parse_prop = parse_panel, }, { .parse_prop = parse_msi_parent, }, { .parse_prop = parse_pses, }, { .parse_prop = parse_power_supplies, }, { .parse_prop = parse_gpio_compat, }, { .parse_prop = parse_interrupts, }, { .parse_prop = parse_interrupt_map, }, { .parse_prop = parse_access_controllers, }, { .parse_prop = parse_regulators, }, { .parse_prop = parse_gpio, }, { .parse_prop = parse_gpios, }, { .parse_prop = parse_post_init_providers, .fwlink_flags = FWLINK_FLAG_IGNORE, }, {} }; /** * of_link_property - Create device links to suppliers listed in a property * @con_np: The consumer device tree node which contains the property * @prop_name: Name of property to be parsed * * This function checks if the property @prop_name that is present in the * @con_np device tree node is one of the known common device tree bindings * that list phandles to suppliers. If @prop_name isn't one, this function * doesn't do anything. * * If @prop_name is one, this function attempts to create fwnode links from the * consumer device tree node @con_np to all the suppliers device tree nodes * listed in @prop_name. * * Any failed attempt to create a fwnode link will NOT result in an immediate * return. of_link_property() must create links to all the available supplier * device tree nodes even when attempts to create a link to one or more * suppliers fail. */ static int of_link_property(struct device_node *con_np, const char *prop_name) { struct device_node *phandle; const struct supplier_bindings *s = of_supplier_bindings; unsigned int i = 0; bool matched = false; /* Do not stop at first failed link, link all available suppliers. */ while (!matched && s->parse_prop) { if (s->optional && !fw_devlink_is_strict()) { s++; continue; } while ((phandle = s->parse_prop(con_np, prop_name, i))) { struct device_node *con_dev_np __free(device_node) = s->get_con_dev ? s->get_con_dev(con_np) : of_node_get(con_np); matched = true; i++; of_link_to_phandle(con_dev_np, phandle, s->fwlink_flags); of_node_put(phandle); } s++; } return 0; } static void __iomem *of_fwnode_iomap(struct fwnode_handle *fwnode, int index) { #ifdef CONFIG_OF_ADDRESS return of_iomap(to_of_node(fwnode), index); #else return NULL; #endif } static int of_fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index) { return of_irq_get(to_of_node(fwnode), index); } static int of_fwnode_add_links(struct fwnode_handle *fwnode) { const struct property *p; struct device_node *con_np = to_of_node(fwnode); if (IS_ENABLED(CONFIG_X86)) return 0; if (!con_np) return -EINVAL; for_each_property_of_node(con_np, p) of_link_property(con_np, p->name); return 0; } const struct fwnode_operations of_fwnode_ops = { .get = of_fwnode_get, .put = of_fwnode_put, .device_is_available = of_fwnode_device_is_available, .device_get_match_data = of_fwnode_device_get_match_data, .device_dma_supported = of_fwnode_device_dma_supported, .device_get_dma_attr = of_fwnode_device_get_dma_attr, .property_present = of_fwnode_property_present, .property_read_bool = of_fwnode_property_read_bool, .property_read_int_array = of_fwnode_property_read_int_array, .property_read_string_array = of_fwnode_property_read_string_array, .get_name = of_fwnode_get_name, .get_name_prefix = of_fwnode_get_name_prefix, .get_parent = of_fwnode_get_parent, .get_next_child_node = of_fwnode_get_next_child_node, .get_named_child_node = of_fwnode_get_named_child_node, .get_reference_args = of_fwnode_get_reference_args, .graph_get_next_endpoint = of_fwnode_graph_get_next_endpoint, .graph_get_remote_endpoint = of_fwnode_graph_get_remote_endpoint, .graph_get_port_parent = of_fwnode_graph_get_port_parent, .graph_parse_endpoint = of_fwnode_graph_parse_endpoint, .iomap = of_fwnode_iomap, .irq_get = of_fwnode_irq_get, .add_links = of_fwnode_add_links, }; EXPORT_SYMBOL_GPL(of_fwnode_ops);
3 1 3 19 1 19 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ #include "main.h" #include <linux/errno.h> #include <linux/list.h> #include <linux/moduleparam.h> #include <linux/netlink.h> #include <linux/printk.h> #include <linux/skbuff.h> #include <linux/stddef.h> #include <linux/string.h> #include <net/genetlink.h> #include <net/netlink.h> #include <uapi/linux/batman_adv.h> #include "bat_algo.h" #include "netlink.h" char batadv_routing_algo[20] = "BATMAN_IV"; static struct hlist_head batadv_algo_list; /** * batadv_algo_init() - Initialize batman-adv algorithm management data * structures */ void batadv_algo_init(void) { INIT_HLIST_HEAD(&batadv_algo_list); } /** * batadv_algo_get() - Search for algorithm with specific name * @name: algorithm name to find * * Return: Pointer to batadv_algo_ops on success, NULL otherwise */ struct batadv_algo_ops *batadv_algo_get(const char *name) { struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp; hlist_for_each_entry(bat_algo_ops_tmp, &batadv_algo_list, list) { if (strcmp(bat_algo_ops_tmp->name, name) != 0) continue; bat_algo_ops = bat_algo_ops_tmp; break; } return bat_algo_ops; } /** * batadv_algo_register() - Register callbacks for a mesh algorithm * @bat_algo_ops: mesh algorithm callbacks to add * * Return: 0 on success or negative error number in case of failure */ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops) { struct batadv_algo_ops *bat_algo_ops_tmp; bat_algo_ops_tmp = batadv_algo_get(bat_algo_ops->name); if (bat_algo_ops_tmp) { pr_info("Trying to register already registered routing algorithm: %s\n", bat_algo_ops->name); return -EEXIST; } /* all algorithms must implement all ops (for now) */ if (!bat_algo_ops->iface.enable || !bat_algo_ops->iface.disable || !bat_algo_ops->iface.update_mac || !bat_algo_ops->iface.primary_set || !bat_algo_ops->neigh.cmp || !bat_algo_ops->neigh.is_similar_or_better) { pr_info("Routing algo '%s' does not implement required ops\n", bat_algo_ops->name); return -EINVAL; } INIT_HLIST_NODE(&bat_algo_ops->list); hlist_add_head(&bat_algo_ops->list, &batadv_algo_list); return 0; } /** * batadv_algo_select() - Select algorithm of mesh interface * @bat_priv: the bat priv with all the mesh interface information * @name: name of the algorithm to select * * The algorithm callbacks for the mesh interface will be set when the algorithm * with the correct name was found. Any previous selected algorithm will not be * deinitialized and the new selected algorithm will also not be initialized. * It is therefore not allowed to call batadv_algo_select outside the creation * function of the mesh interface. * * Return: 0 on success or negative error number in case of failure */ int batadv_algo_select(struct batadv_priv *bat_priv, const char *name) { struct batadv_algo_ops *bat_algo_ops; bat_algo_ops = batadv_algo_get(name); if (!bat_algo_ops) return -EINVAL; bat_priv->algo_ops = bat_algo_ops; return 0; } static int batadv_param_set_ra(const char *val, const struct kernel_param *kp) { struct batadv_algo_ops *bat_algo_ops; char *algo_name = (char *)val; size_t name_len = strlen(algo_name); if (name_len > 0 && algo_name[name_len - 1] == '\n') algo_name[name_len - 1] = '\0'; bat_algo_ops = batadv_algo_get(algo_name); if (!bat_algo_ops) { pr_err("Routing algorithm '%s' is not supported\n", algo_name); return -EINVAL; } return param_set_copystring(algo_name, kp); } static const struct kernel_param_ops batadv_param_ops_ra = { .set = batadv_param_set_ra, .get = param_get_string, }; static struct kparam_string batadv_param_string_ra = { .maxlen = sizeof(batadv_routing_algo), .string = batadv_routing_algo, }; module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra, 0644); /** * batadv_algo_dump_entry() - fill in information about one supported routing * algorithm * @msg: netlink message to be sent back * @portid: Port to reply to * @seq: Sequence number of message * @bat_algo_ops: Algorithm to be dumped * * Return: Error number, or 0 on success */ static int batadv_algo_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_algo_ops *bat_algo_ops) { void *hdr; hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI, BATADV_CMD_GET_ROUTING_ALGOS); if (!hdr) return -EMSGSIZE; if (nla_put_string(msg, BATADV_ATTR_ALGO_NAME, bat_algo_ops->name)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } /** * batadv_algo_dump() - fill in information about supported routing * algorithms * @msg: netlink message to be sent back * @cb: Parameters to the netlink request * * Return: Length of reply message. */ int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb) { int portid = NETLINK_CB(cb->skb).portid; struct batadv_algo_ops *bat_algo_ops; int skip = cb->args[0]; int i = 0; hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) { if (i++ < skip) continue; if (batadv_algo_dump_entry(msg, portid, cb->nlh->nlmsg_seq, bat_algo_ops)) { i--; break; } } cb->args[0] = i; return msg->len; }
1405 1407 1402 1403 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _ASM_X86_INSN_H #define _ASM_X86_INSN_H /* * x86 instruction analysis * * Copyright (C) IBM Corporation, 2009 */ #include <asm/byteorder.h> /* insn_attr_t is defined in inat.h */ #include <asm/inat.h> /* __ignore_sync_check__ */ #if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) struct insn_field { union { insn_value_t value; insn_byte_t bytes[4]; }; /* !0 if we've run insn_get_xxx() for this field */ unsigned char got; unsigned char nbytes; }; static inline void insn_field_set(struct insn_field *p, insn_value_t v, unsigned char n) { p->value = v; p->nbytes = n; } static inline void insn_set_byte(struct insn_field *p, unsigned char n, insn_byte_t v) { p->bytes[n] = v; } #else struct insn_field { insn_value_t value; union { insn_value_t little; insn_byte_t bytes[4]; }; /* !0 if we've run insn_get_xxx() for this field */ unsigned char got; unsigned char nbytes; }; static inline void insn_field_set(struct insn_field *p, insn_value_t v, unsigned char n) { p->value = v; p->little = __cpu_to_le32(v); p->nbytes = n; } static inline void insn_set_byte(struct insn_field *p, unsigned char n, insn_byte_t v) { p->bytes[n] = v; p->value = __le32_to_cpu(p->little); } #endif struct insn { struct insn_field prefixes; /* * Prefixes * prefixes.bytes[3]: last prefix */ struct insn_field rex_prefix; /* REX prefix */ struct insn_field vex_prefix; /* VEX prefix */ struct insn_field opcode; /* * opcode.bytes[0]: opcode1 * opcode.bytes[1]: opcode2 * opcode.bytes[2]: opcode3 */ struct insn_field modrm; struct insn_field sib; struct insn_field displacement; union { struct insn_field immediate; struct insn_field moffset1; /* for 64bit MOV */ struct insn_field immediate1; /* for 64bit imm or off16/32 */ }; union { struct insn_field moffset2; /* for 64bit MOV */ struct insn_field immediate2; /* for 64bit imm or seg16 */ }; int emulate_prefix_size; insn_attr_t attr; unsigned char opnd_bytes; unsigned char addr_bytes; unsigned char length; unsigned char x86_64; const insn_byte_t *kaddr; /* kernel address of insn to analyze */ const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */ const insn_byte_t *next_byte; }; #define MAX_INSN_SIZE 15 #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) #define X86_MODRM_RM(modrm) ((modrm) & 0x07) #define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) #define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) #define X86_SIB_BASE(sib) ((sib) & 0x07) #define X86_REX2_M(rex) ((rex) & 0x80) /* REX2 M0 */ #define X86_REX2_R(rex) ((rex) & 0x40) /* REX2 R4 */ #define X86_REX2_X(rex) ((rex) & 0x20) /* REX2 X4 */ #define X86_REX2_B(rex) ((rex) & 0x10) /* REX2 B4 */ #define X86_REX_W(rex) ((rex) & 8) /* REX or REX2 W */ #define X86_REX_R(rex) ((rex) & 4) /* REX or REX2 R3 */ #define X86_REX_X(rex) ((rex) & 2) /* REX or REX2 X3 */ #define X86_REX_B(rex) ((rex) & 1) /* REX or REX2 B3 */ /* VEX bit flags */ #define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ #define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ #define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ #define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ #define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ /* VEX bit fields */ #define X86_EVEX_M(vex) ((vex) & 0x07) /* EVEX Byte1 */ #define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ #define X86_VEX2_M 1 /* VEX2.M always 1 */ #define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); extern int insn_get_prefixes(struct insn *insn); extern int insn_get_opcode(struct insn *insn); extern int insn_get_modrm(struct insn *insn); extern int insn_get_sib(struct insn *insn); extern int insn_get_displacement(struct insn *insn); extern int insn_get_immediate(struct insn *insn); extern int insn_get_length(struct insn *insn); enum insn_mode { INSN_MODE_32, INSN_MODE_64, /* Mode is determined by the current kernel build. */ INSN_MODE_KERN, INSN_NUM_MODES, }; extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m); #define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN) /* Attribute will be determined after getting ModRM (for opcode groups) */ static inline void insn_get_attribute(struct insn *insn) { insn_get_modrm(insn); } /* Instruction uses RIP-relative addressing */ extern int insn_rip_relative(struct insn *insn); static inline int insn_is_rex2(struct insn *insn) { if (!insn->prefixes.got) insn_get_prefixes(insn); return insn->rex_prefix.nbytes == 2; } static inline insn_byte_t insn_rex2_m_bit(struct insn *insn) { return X86_REX2_M(insn->rex_prefix.bytes[1]); } static inline int insn_is_avx(struct insn *insn) { if (!insn->prefixes.got) insn_get_prefixes(insn); return (insn->vex_prefix.value != 0); } static inline int insn_is_evex(struct insn *insn) { if (!insn->prefixes.got) insn_get_prefixes(insn); return (insn->vex_prefix.nbytes == 4); } static inline int insn_has_emulate_prefix(struct insn *insn) { return !!insn->emulate_prefix_size; } static inline insn_byte_t insn_vex_m_bits(struct insn *insn) { if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ return X86_VEX2_M; else if (insn->vex_prefix.nbytes == 3) /* 3 bytes VEX */ return X86_VEX3_M(insn->vex_prefix.bytes[1]); else /* EVEX */ return X86_EVEX_M(insn->vex_prefix.bytes[1]); } static inline insn_byte_t insn_vex_p_bits(struct insn *insn) { if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ return X86_VEX_P(insn->vex_prefix.bytes[1]); else return X86_VEX_P(insn->vex_prefix.bytes[2]); } static inline insn_byte_t insn_vex_w_bit(struct insn *insn) { if (insn->vex_prefix.nbytes < 3) return 0; return X86_VEX_W(insn->vex_prefix.bytes[2]); } /* Get the last prefix id from last prefix or VEX prefix */ static inline int insn_last_prefix_id(struct insn *insn) { if (insn_is_avx(insn)) return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ if (insn->prefixes.bytes[3]) return inat_get_last_prefix_id(insn->prefixes.bytes[3]); return 0; } /* Offset of each field from kaddr */ static inline int insn_offset_rex_prefix(struct insn *insn) { return insn->prefixes.nbytes; } static inline int insn_offset_vex_prefix(struct insn *insn) { return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; } static inline int insn_offset_opcode(struct insn *insn) { return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; } static inline int insn_offset_modrm(struct insn *insn) { return insn_offset_opcode(insn) + insn->opcode.nbytes; } static inline int insn_offset_sib(struct insn *insn) { return insn_offset_modrm(insn) + insn->modrm.nbytes; } static inline int insn_offset_displacement(struct insn *insn) { return insn_offset_sib(insn) + insn->sib.nbytes; } static inline int insn_offset_immediate(struct insn *insn) { return insn_offset_displacement(insn) + insn->displacement.nbytes; } /** * for_each_insn_prefix() -- Iterate prefixes in the instruction * @insn: Pointer to struct insn. * @idx: Index storage. * @prefix: Prefix byte. * * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix * and the index is stored in @idx (note that this @idx is just for a cursor, * do not change it.) * Since prefixes.nbytes can be bigger than 4 if some prefixes * are repeated, it cannot be used for looping over the prefixes. */ #define for_each_insn_prefix(insn, idx, prefix) \ for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++) #define POP_SS_OPCODE 0x1f #define MOV_SREG_OPCODE 0x8e /* * Intel SDM Vol.3A 6.8.3 states; * "Any single-step trap that would be delivered following the MOV to SS * instruction or POP to SS instruction (because EFLAGS.TF is 1) is * suppressed." * This function returns true if @insn is MOV SS or POP SS. On these * instructions, single stepping is suppressed. */ static inline int insn_masking_exception(struct insn *insn) { return insn->opcode.bytes[0] == POP_SS_OPCODE || (insn->opcode.bytes[0] == MOV_SREG_OPCODE && X86_MODRM_REG(insn->modrm.bytes[0]) == 2); } #endif /* _ASM_X86_INSN_H */
45 22 20 20 1 20 1 20 20 21 18 5 37 22 22 20 3 21 37 10 29 35 7 31 151 152 155 152 3 155 155 155 154 155 151 151 152 1 188 22 2 22 22 6 22 6 11 2 9 2 20 37 37 37 11 31 3 8 6 3 2 1 2 5 13 12 1 18 18 1 1 14 14 14 14 4 7 2 8 19 16 1 1 1 1 1 1 1 4 3 2 3 2 1 3 2 34 17 3 1 13 43 43 3 1 2 37 10 29 3 29 21 39 44 44 17 5 12 2 5 8 3 3 1 14 2 39 1 35 2 38 10 2 32 2 1 4 8 23 23 8 2 5 1 18 20 6 34 35 31 4 34 14 2 28 1 2 9 21 21 9 3 15 6 17 27 41 37 4 39 31 8 35 36 12 1 8 2 9 2 4 8 3 5 2 2 4 2 2 8 8 19 3 21 37 12 26 14 15 6 2 7 2 3 6 11 16 8 8 14 2 151 152 151 22 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 /* * POSIX message queues filesystem for Linux. * * Copyright (C) 2003,2004 Krzysztof Benedyczak (golbi@mat.uni.torun.pl) * Michal Wronski (michal.wronski@gmail.com) * * Spinlocks: Mohamed Abbas (abbas.mohamed@intel.com) * Lockless receive & send, fd based notify: * Manfred Spraul (manfred@colorfullife.com) * * Audit: George Wilson (ltcgcw@us.ibm.com) * * This file is released under the GPL. */ #include <linux/capability.h> #include <linux/init.h> #include <linux/pagemap.h> #include <linux/file.h> #include <linux/mount.h> #include <linux/fs_context.h> #include <linux/namei.h> #include <linux/sysctl.h> #include <linux/poll.h> #include <linux/mqueue.h> #include <linux/msg.h> #include <linux/skbuff.h> #include <linux/vmalloc.h> #include <linux/netlink.h> #include <linux/syscalls.h> #include <linux/audit.h> #include <linux/signal.h> #include <linux/mutex.h> #include <linux/nsproxy.h> #include <linux/pid.h> #include <linux/ipc_namespace.h> #include <linux/user_namespace.h> #include <linux/slab.h> #include <linux/sched/wake_q.h> #include <linux/sched/signal.h> #include <linux/sched/user.h> #include <net/sock.h> #include "util.h" struct mqueue_fs_context { struct ipc_namespace *ipc_ns; bool newns; /* Set if newly created ipc namespace */ }; #define MQUEUE_MAGIC 0x19800202 #define DIRENT_SIZE 20 #define FILENT_SIZE 80 #define SEND 0 #define RECV 1 #define STATE_NONE 0 #define STATE_READY 1 struct posix_msg_tree_node { struct rb_node rb_node; struct list_head msg_list; int priority; }; /* * Locking: * * Accesses to a message queue are synchronized by acquiring info->lock. * * There are two notable exceptions: * - The actual wakeup of a sleeping task is performed using the wake_q * framework. info->lock is already released when wake_up_q is called. * - The exit codepaths after sleeping check ext_wait_queue->state without * any locks. If it is STATE_READY, then the syscall is completed without * acquiring info->lock. * * MQ_BARRIER: * To achieve proper release/acquire memory barrier pairing, the state is set to * STATE_READY with smp_store_release(), and it is read with READ_ONCE followed * by smp_acquire__after_ctrl_dep(). In addition, wake_q_add_safe() is used. * * This prevents the following races: * * 1) With the simple wake_q_add(), the task could be gone already before * the increase of the reference happens * Thread A * Thread B * WRITE_ONCE(wait.state, STATE_NONE); * schedule_hrtimeout() * wake_q_add(A) * if (cmpxchg()) // success * ->state = STATE_READY (reordered) * <timeout returns> * if (wait.state == STATE_READY) return; * sysret to user space * sys_exit() * get_task_struct() // UaF * * Solution: Use wake_q_add_safe() and perform the get_task_struct() before * the smp_store_release() that does ->state = STATE_READY. * * 2) Without proper _release/_acquire barriers, the woken up task * could read stale data * * Thread A * Thread B * do_mq_timedreceive * WRITE_ONCE(wait.state, STATE_NONE); * schedule_hrtimeout() * state = STATE_READY; * <timeout returns> * if (wait.state == STATE_READY) return; * msg_ptr = wait.msg; // Access to stale data! * receiver->msg = message; (reordered) * * Solution: use _release and _acquire barriers. * * 3) There is intentionally no barrier when setting current->state * to TASK_INTERRUPTIBLE: spin_unlock(&info->lock) provides the * release memory barrier, and the wakeup is triggered when holding * info->lock, i.e. spin_lock(&info->lock) provided a pairing * acquire memory barrier. */ struct ext_wait_queue { /* queue of sleeping tasks */ struct task_struct *task; struct list_head list; struct msg_msg *msg; /* ptr of loaded message */ int state; /* one of STATE_* values */ }; struct mqueue_inode_info { spinlock_t lock; struct inode vfs_inode; wait_queue_head_t wait_q; struct rb_root msg_tree; struct rb_node *msg_tree_rightmost; struct posix_msg_tree_node *node_cache; struct mq_attr attr; struct sigevent notify; struct pid *notify_owner; u32 notify_self_exec_id; struct user_namespace *notify_user_ns; struct ucounts *ucounts; /* user who created, for accounting */ struct sock *notify_sock; struct sk_buff *notify_cookie; /* for tasks waiting for free space and messages, respectively */ struct ext_wait_queue e_wait_q[2]; unsigned long qsize; /* size of queue in memory (sum of all msgs) */ }; static struct file_system_type mqueue_fs_type; static const struct inode_operations mqueue_dir_inode_operations; static const struct file_operations mqueue_file_operations; static const struct super_operations mqueue_super_ops; static const struct fs_context_operations mqueue_fs_context_ops; static void remove_notification(struct mqueue_inode_info *info); static struct kmem_cache *mqueue_inode_cachep; static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode) { return container_of(inode, struct mqueue_inode_info, vfs_inode); } /* * This routine should be called with the mq_lock held. */ static inline struct ipc_namespace *__get_ns_from_inode(struct inode *inode) { return get_ipc_ns(inode->i_sb->s_fs_info); } static struct ipc_namespace *get_ns_from_inode(struct inode *inode) { struct ipc_namespace *ns; spin_lock(&mq_lock); ns = __get_ns_from_inode(inode); spin_unlock(&mq_lock); return ns; } /* Auxiliary functions to manipulate messages' list */ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info) { struct rb_node **p, *parent = NULL; struct posix_msg_tree_node *leaf; bool rightmost = true; p = &info->msg_tree.rb_node; while (*p) { parent = *p; leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node); if (likely(leaf->priority == msg->m_type)) goto insert_msg; else if (msg->m_type < leaf->priority) { p = &(*p)->rb_left; rightmost = false; } else p = &(*p)->rb_right; } if (info->node_cache) { leaf = info->node_cache; info->node_cache = NULL; } else { leaf = kmalloc(sizeof(*leaf), GFP_ATOMIC); if (!leaf) return -ENOMEM; INIT_LIST_HEAD(&leaf->msg_list); } leaf->priority = msg->m_type; if (rightmost) info->msg_tree_rightmost = &leaf->rb_node; rb_link_node(&leaf->rb_node, parent, p); rb_insert_color(&leaf->rb_node, &info->msg_tree); insert_msg: info->attr.mq_curmsgs++; info->qsize += msg->m_ts; list_add_tail(&msg->m_list, &leaf->msg_list); return 0; } static inline void msg_tree_erase(struct posix_msg_tree_node *leaf, struct mqueue_inode_info *info) { struct rb_node *node = &leaf->rb_node; if (info->msg_tree_rightmost == node) info->msg_tree_rightmost = rb_prev(node); rb_erase(node, &info->msg_tree); if (info->node_cache) kfree(leaf); else info->node_cache = leaf; } static inline struct msg_msg *msg_get(struct mqueue_inode_info *info) { struct rb_node *parent = NULL; struct posix_msg_tree_node *leaf; struct msg_msg *msg; try_again: /* * During insert, low priorities go to the left and high to the * right. On receive, we want the highest priorities first, so * walk all the way to the right. */ parent = info->msg_tree_rightmost; if (!parent) { if (info->attr.mq_curmsgs) { pr_warn_once("Inconsistency in POSIX message queue, " "no tree element, but supposedly messages " "should exist!\n"); info->attr.mq_curmsgs = 0; } return NULL; } leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node); if (unlikely(list_empty(&leaf->msg_list))) { pr_warn_once("Inconsistency in POSIX message queue, " "empty leaf node but we haven't implemented " "lazy leaf delete!\n"); msg_tree_erase(leaf, info); goto try_again; } else { msg = list_first_entry(&leaf->msg_list, struct msg_msg, m_list); list_del(&msg->m_list); if (list_empty(&leaf->msg_list)) { msg_tree_erase(leaf, info); } } info->attr.mq_curmsgs--; info->qsize -= msg->m_ts; return msg; } static struct inode *mqueue_get_inode(struct super_block *sb, struct ipc_namespace *ipc_ns, umode_t mode, struct mq_attr *attr) { struct inode *inode; int ret = -ENOMEM; inode = new_inode(sb); if (!inode) goto err; inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); simple_inode_init_ts(inode); if (S_ISREG(mode)) { struct mqueue_inode_info *info; unsigned long mq_bytes, mq_treesize; inode->i_fop = &mqueue_file_operations; inode->i_size = FILENT_SIZE; /* mqueue specific info */ info = MQUEUE_I(inode); spin_lock_init(&info->lock); init_waitqueue_head(&info->wait_q); INIT_LIST_HEAD(&info->e_wait_q[0].list); INIT_LIST_HEAD(&info->e_wait_q[1].list); info->notify_owner = NULL; info->notify_user_ns = NULL; info->qsize = 0; info->ucounts = NULL; /* set when all is ok */ info->msg_tree = RB_ROOT; info->msg_tree_rightmost = NULL; info->node_cache = NULL; memset(&info->attr, 0, sizeof(info->attr)); info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max, ipc_ns->mq_msg_default); info->attr.mq_msgsize = min(ipc_ns->mq_msgsize_max, ipc_ns->mq_msgsize_default); if (attr) { info->attr.mq_maxmsg = attr->mq_maxmsg; info->attr.mq_msgsize = attr->mq_msgsize; } /* * We used to allocate a static array of pointers and account * the size of that array as well as one msg_msg struct per * possible message into the queue size. That's no longer * accurate as the queue is now an rbtree and will grow and * shrink depending on usage patterns. We can, however, still * account one msg_msg struct per message, but the nodes are * allocated depending on priority usage, and most programs * only use one, or a handful, of priorities. However, since * this is pinned memory, we need to assume worst case, so * that means the min(mq_maxmsg, max_priorities) * struct * posix_msg_tree_node. */ ret = -EINVAL; if (info->attr.mq_maxmsg <= 0 || info->attr.mq_msgsize <= 0) goto out_inode; if (capable(CAP_SYS_RESOURCE)) { if (info->attr.mq_maxmsg > HARD_MSGMAX || info->attr.mq_msgsize > HARD_MSGSIZEMAX) goto out_inode; } else { if (info->attr.mq_maxmsg > ipc_ns->mq_msg_max || info->attr.mq_msgsize > ipc_ns->mq_msgsize_max) goto out_inode; } ret = -EOVERFLOW; /* check for overflow */ if (info->attr.mq_msgsize > ULONG_MAX/info->attr.mq_maxmsg) goto out_inode; mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) + min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) * sizeof(struct posix_msg_tree_node); mq_bytes = info->attr.mq_maxmsg * info->attr.mq_msgsize; if (mq_bytes + mq_treesize < mq_bytes) goto out_inode; mq_bytes += mq_treesize; info->ucounts = get_ucounts(current_ucounts()); if (info->ucounts) { long msgqueue; spin_lock(&mq_lock); msgqueue = inc_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes); if (msgqueue == LONG_MAX || msgqueue > rlimit(RLIMIT_MSGQUEUE)) { dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes); spin_unlock(&mq_lock); put_ucounts(info->ucounts); info->ucounts = NULL; /* mqueue_evict_inode() releases info->messages */ ret = -EMFILE; goto out_inode; } spin_unlock(&mq_lock); } } else if (S_ISDIR(mode)) { inc_nlink(inode); /* Some things misbehave if size == 0 on a directory */ inode->i_size = 2 * DIRENT_SIZE; inode->i_op = &mqueue_dir_inode_operations; inode->i_fop = &simple_dir_operations; } return inode; out_inode: iput(inode); err: return ERR_PTR(ret); } static int mqueue_fill_super(struct super_block *sb, struct fs_context *fc) { struct inode *inode; struct ipc_namespace *ns = sb->s_fs_info; sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = MQUEUE_MAGIC; sb->s_op = &mqueue_super_ops; inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); sb->s_root = d_make_root(inode); if (!sb->s_root) return -ENOMEM; return 0; } static int mqueue_get_tree(struct fs_context *fc) { struct mqueue_fs_context *ctx = fc->fs_private; /* * With a newly created ipc namespace, we don't need to do a search * for an ipc namespace match, but we still need to set s_fs_info. */ if (ctx->newns) { fc->s_fs_info = ctx->ipc_ns; return get_tree_nodev(fc, mqueue_fill_super); } return get_tree_keyed(fc, mqueue_fill_super, ctx->ipc_ns); } static void mqueue_fs_context_free(struct fs_context *fc) { struct mqueue_fs_context *ctx = fc->fs_private; put_ipc_ns(ctx->ipc_ns); kfree(ctx); } static int mqueue_init_fs_context(struct fs_context *fc) { struct mqueue_fs_context *ctx; ctx = kzalloc(sizeof(struct mqueue_fs_context), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns); put_user_ns(fc->user_ns); fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns); fc->fs_private = ctx; fc->ops = &mqueue_fs_context_ops; return 0; } /* * mq_init_ns() is currently the only caller of mq_create_mount(). * So the ns parameter is always a newly created ipc namespace. */ static struct vfsmount *mq_create_mount(struct ipc_namespace *ns) { struct mqueue_fs_context *ctx; struct fs_context *fc; struct vfsmount *mnt; fc = fs_context_for_mount(&mqueue_fs_type, SB_KERNMOUNT); if (IS_ERR(fc)) return ERR_CAST(fc); ctx = fc->fs_private; ctx->newns = true; put_ipc_ns(ctx->ipc_ns); ctx->ipc_ns = get_ipc_ns(ns); put_user_ns(fc->user_ns); fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns); mnt = fc_mount(fc); put_fs_context(fc); return mnt; } static void init_once(void *foo) { struct mqueue_inode_info *p = foo; inode_init_once(&p->vfs_inode); } static struct inode *mqueue_alloc_inode(struct super_block *sb) { struct mqueue_inode_info *ei; ei = alloc_inode_sb(sb, mqueue_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; } static void mqueue_free_inode(struct inode *inode) { kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode)); } static void mqueue_evict_inode(struct inode *inode) { struct mqueue_inode_info *info; struct ipc_namespace *ipc_ns; struct msg_msg *msg, *nmsg; LIST_HEAD(tmp_msg); clear_inode(inode); if (S_ISDIR(inode->i_mode)) return; ipc_ns = get_ns_from_inode(inode); info = MQUEUE_I(inode); spin_lock(&info->lock); while ((msg = msg_get(info)) != NULL) list_add_tail(&msg->m_list, &tmp_msg); kfree(info->node_cache); spin_unlock(&info->lock); list_for_each_entry_safe(msg, nmsg, &tmp_msg, m_list) { list_del(&msg->m_list); free_msg(msg); } if (info->ucounts) { unsigned long mq_bytes, mq_treesize; /* Total amount of bytes accounted for the mqueue */ mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) + min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) * sizeof(struct posix_msg_tree_node); mq_bytes = mq_treesize + (info->attr.mq_maxmsg * info->attr.mq_msgsize); spin_lock(&mq_lock); dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes); /* * get_ns_from_inode() ensures that the * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns * to which we now hold a reference, or it is NULL. * We can't put it here under mq_lock, though. */ if (ipc_ns) ipc_ns->mq_queues_count--; spin_unlock(&mq_lock); put_ucounts(info->ucounts); info->ucounts = NULL; } if (ipc_ns) put_ipc_ns(ipc_ns); } static int mqueue_create_attr(struct dentry *dentry, umode_t mode, void *arg) { struct inode *dir = dentry->d_parent->d_inode; struct inode *inode; struct mq_attr *attr = arg; int error; struct ipc_namespace *ipc_ns; spin_lock(&mq_lock); ipc_ns = __get_ns_from_inode(dir); if (!ipc_ns) { error = -EACCES; goto out_unlock; } if (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max && !capable(CAP_SYS_RESOURCE)) { error = -ENOSPC; goto out_unlock; } ipc_ns->mq_queues_count++; spin_unlock(&mq_lock); inode = mqueue_get_inode(dir->i_sb, ipc_ns, mode, attr); if (IS_ERR(inode)) { error = PTR_ERR(inode); spin_lock(&mq_lock); ipc_ns->mq_queues_count--; goto out_unlock; } put_ipc_ns(ipc_ns); dir->i_size += DIRENT_SIZE; simple_inode_init_ts(dir); d_instantiate(dentry, inode); dget(dentry); return 0; out_unlock: spin_unlock(&mq_lock); if (ipc_ns) put_ipc_ns(ipc_ns); return error; } static int mqueue_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { return mqueue_create_attr(dentry, mode, NULL); } static int mqueue_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(dentry); simple_inode_init_ts(dir); dir->i_size -= DIRENT_SIZE; drop_nlink(inode); dput(dentry); return 0; } /* * This is routine for system read from queue file. * To avoid mess with doing here some sort of mq_receive we allow * to read only queue size & notification info (the only values * that are interesting from user point of view and aren't accessible * through std routines) */ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data, size_t count, loff_t *off) { struct inode *inode = file_inode(filp); struct mqueue_inode_info *info = MQUEUE_I(inode); char buffer[FILENT_SIZE]; ssize_t ret; spin_lock(&info->lock); snprintf(buffer, sizeof(buffer), "QSIZE:%-10lu NOTIFY:%-5d SIGNO:%-5d NOTIFY_PID:%-6d\n", info->qsize, info->notify_owner ? info->notify.sigev_notify : 0, (info->notify_owner && info->notify.sigev_notify == SIGEV_SIGNAL) ? info->notify.sigev_signo : 0, pid_vnr(info->notify_owner)); spin_unlock(&info->lock); buffer[sizeof(buffer)-1] = '\0'; ret = simple_read_from_buffer(u_data, count, off, buffer, strlen(buffer)); if (ret <= 0) return ret; inode_set_atime_to_ts(inode, inode_set_ctime_current(inode)); return ret; } static int mqueue_flush_file(struct file *filp, fl_owner_t id) { struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp)); spin_lock(&info->lock); if (task_tgid(current) == info->notify_owner) remove_notification(info); spin_unlock(&info->lock); return 0; } static __poll_t mqueue_poll_file(struct file *filp, struct poll_table_struct *poll_tab) { struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp)); __poll_t retval = 0; poll_wait(filp, &info->wait_q, poll_tab); spin_lock(&info->lock); if (info->attr.mq_curmsgs) retval = EPOLLIN | EPOLLRDNORM; if (info->attr.mq_curmsgs < info->attr.mq_maxmsg) retval |= EPOLLOUT | EPOLLWRNORM; spin_unlock(&info->lock); return retval; } /* Adds current to info->e_wait_q[sr] before element with smaller prio */ static void wq_add(struct mqueue_inode_info *info, int sr, struct ext_wait_queue *ewp) { struct ext_wait_queue *walk; list_for_each_entry(walk, &info->e_wait_q[sr].list, list) { if (walk->task->prio <= current->prio) { list_add_tail(&ewp->list, &walk->list); return; } } list_add_tail(&ewp->list, &info->e_wait_q[sr].list); } /* * Puts current task to sleep. Caller must hold queue lock. After return * lock isn't held. * sr: SEND or RECV */ static int wq_sleep(struct mqueue_inode_info *info, int sr, ktime_t *timeout, struct ext_wait_queue *ewp) __releases(&info->lock) { int retval; signed long time; wq_add(info, sr, ewp); for (;;) { /* memory barrier not required, we hold info->lock */ __set_current_state(TASK_INTERRUPTIBLE); spin_unlock(&info->lock); time = schedule_hrtimeout_range_clock(timeout, 0, HRTIMER_MODE_ABS, CLOCK_REALTIME); if (READ_ONCE(ewp->state) == STATE_READY) { /* see MQ_BARRIER for purpose/pairing */ smp_acquire__after_ctrl_dep(); retval = 0; goto out; } spin_lock(&info->lock); /* we hold info->lock, so no memory barrier required */ if (READ_ONCE(ewp->state) == STATE_READY) { retval = 0; goto out_unlock; } if (signal_pending(current)) { retval = -ERESTARTSYS; break; } if (time == 0) { retval = -ETIMEDOUT; break; } } list_del(&ewp->list); out_unlock: spin_unlock(&info->lock); out: return retval; } /* * Returns waiting task that should be serviced first or NULL if none exists */ static struct ext_wait_queue *wq_get_first_waiter( struct mqueue_inode_info *info, int sr) { struct list_head *ptr; ptr = info->e_wait_q[sr].list.prev; if (ptr == &info->e_wait_q[sr].list) return NULL; return list_entry(ptr, struct ext_wait_queue, list); } static inline void set_cookie(struct sk_buff *skb, char code) { ((char *)skb->data)[NOTIFY_COOKIE_LEN-1] = code; } /* * The next function is only to split too long sys_mq_timedsend */ static void __do_notify(struct mqueue_inode_info *info) { /* notification * invoked when there is registered process and there isn't process * waiting synchronously for message AND state of queue changed from * empty to not empty. Here we are sure that no one is waiting * synchronously. */ if (info->notify_owner && info->attr.mq_curmsgs == 1) { switch (info->notify.sigev_notify) { case SIGEV_NONE: break; case SIGEV_SIGNAL: { struct kernel_siginfo sig_i; struct task_struct *task; /* do_mq_notify() accepts sigev_signo == 0, why?? */ if (!info->notify.sigev_signo) break; clear_siginfo(&sig_i); sig_i.si_signo = info->notify.sigev_signo; sig_i.si_errno = 0; sig_i.si_code = SI_MESGQ; sig_i.si_value = info->notify.sigev_value; rcu_read_lock(); /* map current pid/uid into info->owner's namespaces */ sig_i.si_pid = task_tgid_nr_ns(current, ns_of_pid(info->notify_owner)); sig_i.si_uid = from_kuid_munged(info->notify_user_ns, current_uid()); /* * We can't use kill_pid_info(), this signal should * bypass check_kill_permission(). It is from kernel * but si_fromuser() can't know this. * We do check the self_exec_id, to avoid sending * signals to programs that don't expect them. */ task = pid_task(info->notify_owner, PIDTYPE_TGID); if (task && task->self_exec_id == info->notify_self_exec_id) { do_send_sig_info(info->notify.sigev_signo, &sig_i, task, PIDTYPE_TGID); } rcu_read_unlock(); break; } case SIGEV_THREAD: set_cookie(info->notify_cookie, NOTIFY_WOKENUP); netlink_sendskb(info->notify_sock, info->notify_cookie); break; } /* after notification unregisters process */ put_pid(info->notify_owner); put_user_ns(info->notify_user_ns); info->notify_owner = NULL; info->notify_user_ns = NULL; } wake_up(&info->wait_q); } static int prepare_timeout(const struct __kernel_timespec __user *u_abs_timeout, struct timespec64 *ts) { if (get_timespec64(ts, u_abs_timeout)) return -EFAULT; if (!timespec64_valid(ts)) return -EINVAL; return 0; } static void remove_notification(struct mqueue_inode_info *info) { if (info->notify_owner != NULL && info->notify.sigev_notify == SIGEV_THREAD) { set_cookie(info->notify_cookie, NOTIFY_REMOVED); netlink_sendskb(info->notify_sock, info->notify_cookie); } put_pid(info->notify_owner); put_user_ns(info->notify_user_ns); info->notify_owner = NULL; info->notify_user_ns = NULL; } static int prepare_open(struct dentry *dentry, int oflag, int ro, umode_t mode, struct filename *name, struct mq_attr *attr) { static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, MAY_READ | MAY_WRITE }; int acc; if (d_really_is_negative(dentry)) { if (!(oflag & O_CREAT)) return -ENOENT; if (ro) return ro; audit_inode_parent_hidden(name, dentry->d_parent); return vfs_mkobj(dentry, mode & ~current_umask(), mqueue_create_attr, attr); } /* it already existed */ audit_inode(name, dentry, 0); if ((oflag & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) return -EEXIST; if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) return -EINVAL; acc = oflag2acc[oflag & O_ACCMODE]; return inode_permission(&nop_mnt_idmap, d_inode(dentry), acc); } static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, struct mq_attr *attr) { struct vfsmount *mnt = current->nsproxy->ipc_ns->mq_mnt; struct dentry *root = mnt->mnt_root; struct filename *name; struct path path; int fd, error; int ro; audit_mq_open(oflag, mode, attr); name = getname(u_name); if (IS_ERR(name)) return PTR_ERR(name); fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) goto out_putname; ro = mnt_want_write(mnt); /* we'll drop it in any case */ inode_lock(d_inode(root)); path.dentry = lookup_noperm(&QSTR(name->name), root); if (IS_ERR(path.dentry)) { error = PTR_ERR(path.dentry); goto out_putfd; } path.mnt = mntget(mnt); error = prepare_open(path.dentry, oflag, ro, mode, name, attr); if (!error) { struct file *file = dentry_open(&path, oflag, current_cred()); if (!IS_ERR(file)) fd_install(fd, file); else error = PTR_ERR(file); } path_put(&path); out_putfd: if (error) { put_unused_fd(fd); fd = error; } inode_unlock(d_inode(root)); if (!ro) mnt_drop_write(mnt); out_putname: putname(name); return fd; } SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode, struct mq_attr __user *, u_attr) { struct mq_attr attr; if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr))) return -EFAULT; return do_mq_open(u_name, oflag, mode, u_attr ? &attr : NULL); } SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) { int err; struct filename *name; struct dentry *dentry; struct inode *inode = NULL; struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; struct vfsmount *mnt = ipc_ns->mq_mnt; name = getname(u_name); if (IS_ERR(name)) return PTR_ERR(name); audit_inode_parent_hidden(name, mnt->mnt_root); err = mnt_want_write(mnt); if (err) goto out_name; inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT); dentry = lookup_noperm(&QSTR(name->name), mnt->mnt_root); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); goto out_unlock; } inode = d_inode(dentry); if (!inode) { err = -ENOENT; } else { ihold(inode); err = vfs_unlink(&nop_mnt_idmap, d_inode(dentry->d_parent), dentry, NULL); } dput(dentry); out_unlock: inode_unlock(d_inode(mnt->mnt_root)); iput(inode); mnt_drop_write(mnt); out_name: putname(name); return err; } /* Pipelined send and receive functions. * * If a receiver finds no waiting message, then it registers itself in the * list of waiting receivers. A sender checks that list before adding the new * message into the message array. If there is a waiting receiver, then it * bypasses the message array and directly hands the message over to the * receiver. The receiver accepts the message and returns without grabbing the * queue spinlock: * * - Set pointer to message. * - Queue the receiver task for later wakeup (without the info->lock). * - Update its state to STATE_READY. Now the receiver can continue. * - Wake up the process after the lock is dropped. Should the process wake up * before this wakeup (due to a timeout or a signal) it will either see * STATE_READY and continue or acquire the lock to check the state again. * * The same algorithm is used for senders. */ static inline void __pipelined_op(struct wake_q_head *wake_q, struct mqueue_inode_info *info, struct ext_wait_queue *this) { struct task_struct *task; list_del(&this->list); task = get_task_struct(this->task); /* see MQ_BARRIER for purpose/pairing */ smp_store_release(&this->state, STATE_READY); wake_q_add_safe(wake_q, task); } /* pipelined_send() - send a message directly to the task waiting in * sys_mq_timedreceive() (without inserting message into a queue). */ static inline void pipelined_send(struct wake_q_head *wake_q, struct mqueue_inode_info *info, struct msg_msg *message, struct ext_wait_queue *receiver) { receiver->msg = message; __pipelined_op(wake_q, info, receiver); } /* pipelined_receive() - if there is task waiting in sys_mq_timedsend() * gets its message and put to the queue (we have one free place for sure). */ static inline void pipelined_receive(struct wake_q_head *wake_q, struct mqueue_inode_info *info) { struct ext_wait_queue *sender = wq_get_first_waiter(info, SEND); if (!sender) { /* for poll */ wake_up_interruptible(&info->wait_q); return; } if (msg_insert(sender->msg, info)) return; __pipelined_op(wake_q, info, sender); } static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr, size_t msg_len, unsigned int msg_prio, struct timespec64 *ts) { struct inode *inode; struct ext_wait_queue wait; struct ext_wait_queue *receiver; struct msg_msg *msg_ptr; struct mqueue_inode_info *info; ktime_t expires, *timeout = NULL; struct posix_msg_tree_node *new_leaf = NULL; int ret = 0; DEFINE_WAKE_Q(wake_q); if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX)) return -EINVAL; if (ts) { expires = timespec64_to_ktime(*ts); timeout = &expires; } audit_mq_sendrecv(mqdes, msg_len, msg_prio, ts); CLASS(fd, f)(mqdes); if (fd_empty(f)) return -EBADF; inode = file_inode(fd_file(f)); if (unlikely(fd_file(f)->f_op != &mqueue_file_operations)) return -EBADF; info = MQUEUE_I(inode); audit_file(fd_file(f)); if (unlikely(!(fd_file(f)->f_mode & FMODE_WRITE))) return -EBADF; if (unlikely(msg_len > info->attr.mq_msgsize)) return -EMSGSIZE; /* First try to allocate memory, before doing anything with * existing queues. */ msg_ptr = load_msg(u_msg_ptr, msg_len); if (IS_ERR(msg_ptr)) return PTR_ERR(msg_ptr); msg_ptr->m_ts = msg_len; msg_ptr->m_type = msg_prio; /* * msg_insert really wants us to have a valid, spare node struct so * it doesn't have to kmalloc a GFP_ATOMIC allocation, but it will * fall back to that if necessary. */ if (!info->node_cache) new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL); spin_lock(&info->lock); if (!info->node_cache && new_leaf) { /* Save our speculative allocation into the cache */ INIT_LIST_HEAD(&new_leaf->msg_list); info->node_cache = new_leaf; new_leaf = NULL; } else { kfree(new_leaf); } if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) { if (fd_file(f)->f_flags & O_NONBLOCK) { ret = -EAGAIN; } else { wait.task = current; wait.msg = (void *) msg_ptr; /* memory barrier not required, we hold info->lock */ WRITE_ONCE(wait.state, STATE_NONE); ret = wq_sleep(info, SEND, timeout, &wait); /* * wq_sleep must be called with info->lock held, and * returns with the lock released */ goto out_free; } } else { receiver = wq_get_first_waiter(info, RECV); if (receiver) { pipelined_send(&wake_q, info, msg_ptr, receiver); } else { /* adds message to the queue */ ret = msg_insert(msg_ptr, info); if (ret) goto out_unlock; __do_notify(info); } simple_inode_init_ts(inode); } out_unlock: spin_unlock(&info->lock); wake_up_q(&wake_q); out_free: if (ret) free_msg(msg_ptr); return ret; } static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, size_t msg_len, unsigned int __user *u_msg_prio, struct timespec64 *ts) { ssize_t ret; struct msg_msg *msg_ptr; struct inode *inode; struct mqueue_inode_info *info; struct ext_wait_queue wait; ktime_t expires, *timeout = NULL; struct posix_msg_tree_node *new_leaf = NULL; if (ts) { expires = timespec64_to_ktime(*ts); timeout = &expires; } audit_mq_sendrecv(mqdes, msg_len, 0, ts); CLASS(fd, f)(mqdes); if (fd_empty(f)) return -EBADF; inode = file_inode(fd_file(f)); if (unlikely(fd_file(f)->f_op != &mqueue_file_operations)) return -EBADF; info = MQUEUE_I(inode); audit_file(fd_file(f)); if (unlikely(!(fd_file(f)->f_mode & FMODE_READ))) return -EBADF; /* checks if buffer is big enough */ if (unlikely(msg_len < info->attr.mq_msgsize)) return -EMSGSIZE; /* * msg_insert really wants us to have a valid, spare node struct so * it doesn't have to kmalloc a GFP_ATOMIC allocation, but it will * fall back to that if necessary. */ if (!info->node_cache) new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL); spin_lock(&info->lock); if (!info->node_cache && new_leaf) { /* Save our speculative allocation into the cache */ INIT_LIST_HEAD(&new_leaf->msg_list); info->node_cache = new_leaf; } else { kfree(new_leaf); } if (info->attr.mq_curmsgs == 0) { if (fd_file(f)->f_flags & O_NONBLOCK) { spin_unlock(&info->lock); ret = -EAGAIN; } else { wait.task = current; /* memory barrier not required, we hold info->lock */ WRITE_ONCE(wait.state, STATE_NONE); ret = wq_sleep(info, RECV, timeout, &wait); msg_ptr = wait.msg; } } else { DEFINE_WAKE_Q(wake_q); msg_ptr = msg_get(info); simple_inode_init_ts(inode); /* There is now free space in queue. */ pipelined_receive(&wake_q, info); spin_unlock(&info->lock); wake_up_q(&wake_q); ret = 0; } if (ret == 0) { ret = msg_ptr->m_ts; if ((u_msg_prio && put_user(msg_ptr->m_type, u_msg_prio)) || store_msg(u_msg_ptr, msg_ptr, msg_ptr->m_ts)) { ret = -EFAULT; } free_msg(msg_ptr); } return ret; } SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, size_t, msg_len, unsigned int, msg_prio, const struct __kernel_timespec __user *, u_abs_timeout) { struct timespec64 ts, *p = NULL; if (u_abs_timeout) { int res = prepare_timeout(u_abs_timeout, &ts); if (res) return res; p = &ts; } return do_mq_timedsend(mqdes, u_msg_ptr, msg_len, msg_prio, p); } SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, size_t, msg_len, unsigned int __user *, u_msg_prio, const struct __kernel_timespec __user *, u_abs_timeout) { struct timespec64 ts, *p = NULL; if (u_abs_timeout) { int res = prepare_timeout(u_abs_timeout, &ts); if (res) return res; p = &ts; } return do_mq_timedreceive(mqdes, u_msg_ptr, msg_len, u_msg_prio, p); } /* * Notes: the case when user wants us to deregister (with NULL as pointer) * and he isn't currently owner of notification, will be silently discarded. * It isn't explicitly defined in the POSIX. */ static int do_mq_notify(mqd_t mqdes, const struct sigevent *notification) { int ret; struct sock *sock; struct inode *inode; struct mqueue_inode_info *info; struct sk_buff *nc; audit_mq_notify(mqdes, notification); nc = NULL; sock = NULL; if (notification != NULL) { if (unlikely(notification->sigev_notify != SIGEV_NONE && notification->sigev_notify != SIGEV_SIGNAL && notification->sigev_notify != SIGEV_THREAD)) return -EINVAL; if (notification->sigev_notify == SIGEV_SIGNAL && !valid_signal(notification->sigev_signo)) { return -EINVAL; } if (notification->sigev_notify == SIGEV_THREAD) { long timeo; /* create the notify skb */ nc = alloc_skb(NOTIFY_COOKIE_LEN, GFP_KERNEL); if (!nc) return -ENOMEM; if (copy_from_user(nc->data, notification->sigev_value.sival_ptr, NOTIFY_COOKIE_LEN)) { kfree_skb(nc); return -EFAULT; } /* TODO: add a header? */ skb_put(nc, NOTIFY_COOKIE_LEN); /* and attach it to the socket */ retry: sock = netlink_getsockbyfd(notification->sigev_signo); if (IS_ERR(sock)) { kfree_skb(nc); return PTR_ERR(sock); } timeo = MAX_SCHEDULE_TIMEOUT; ret = netlink_attachskb(sock, nc, &timeo, NULL); if (ret == 1) goto retry; if (ret) return ret; } } CLASS(fd, f)(mqdes); if (fd_empty(f)) { ret = -EBADF; goto out; } inode = file_inode(fd_file(f)); if (unlikely(fd_file(f)->f_op != &mqueue_file_operations)) { ret = -EBADF; goto out; } info = MQUEUE_I(inode); ret = 0; spin_lock(&info->lock); if (notification == NULL) { if (info->notify_owner == task_tgid(current)) { remove_notification(info); inode_set_atime_to_ts(inode, inode_set_ctime_current(inode)); } } else if (info->notify_owner != NULL) { ret = -EBUSY; } else { switch (notification->sigev_notify) { case SIGEV_NONE: info->notify.sigev_notify = SIGEV_NONE; break; case SIGEV_THREAD: info->notify_sock = sock; info->notify_cookie = nc; sock = NULL; nc = NULL; info->notify.sigev_notify = SIGEV_THREAD; break; case SIGEV_SIGNAL: info->notify.sigev_signo = notification->sigev_signo; info->notify.sigev_value = notification->sigev_value; info->notify.sigev_notify = SIGEV_SIGNAL; info->notify_self_exec_id = current->self_exec_id; break; } info->notify_owner = get_pid(task_tgid(current)); info->notify_user_ns = get_user_ns(current_user_ns()); inode_set_atime_to_ts(inode, inode_set_ctime_current(inode)); } spin_unlock(&info->lock); out: if (sock) netlink_detachskb(sock, nc); return ret; } SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes, const struct sigevent __user *, u_notification) { struct sigevent n, *p = NULL; if (u_notification) { if (copy_from_user(&n, u_notification, sizeof(struct sigevent))) return -EFAULT; p = &n; } return do_mq_notify(mqdes, p); } static int do_mq_getsetattr(int mqdes, struct mq_attr *new, struct mq_attr *old) { struct inode *inode; struct mqueue_inode_info *info; if (new && (new->mq_flags & (~O_NONBLOCK))) return -EINVAL; CLASS(fd, f)(mqdes); if (fd_empty(f)) return -EBADF; if (unlikely(fd_file(f)->f_op != &mqueue_file_operations)) return -EBADF; inode = file_inode(fd_file(f)); info = MQUEUE_I(inode); spin_lock(&info->lock); if (old) { *old = info->attr; old->mq_flags = fd_file(f)->f_flags & O_NONBLOCK; } if (new) { audit_mq_getsetattr(mqdes, new); spin_lock(&fd_file(f)->f_lock); if (new->mq_flags & O_NONBLOCK) fd_file(f)->f_flags |= O_NONBLOCK; else fd_file(f)->f_flags &= ~O_NONBLOCK; spin_unlock(&fd_file(f)->f_lock); inode_set_atime_to_ts(inode, inode_set_ctime_current(inode)); } spin_unlock(&info->lock); return 0; } SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes, const struct mq_attr __user *, u_mqstat, struct mq_attr __user *, u_omqstat) { int ret; struct mq_attr mqstat, omqstat; struct mq_attr *new = NULL, *old = NULL; if (u_mqstat) { new = &mqstat; if (copy_from_user(new, u_mqstat, sizeof(struct mq_attr))) return -EFAULT; } if (u_omqstat) old = &omqstat; ret = do_mq_getsetattr(mqdes, new, old); if (ret || !old) return ret; if (copy_to_user(u_omqstat, old, sizeof(struct mq_attr))) return -EFAULT; return 0; } #ifdef CONFIG_COMPAT struct compat_mq_attr { compat_long_t mq_flags; /* message queue flags */ compat_long_t mq_maxmsg; /* maximum number of messages */ compat_long_t mq_msgsize; /* maximum message size */ compat_long_t mq_curmsgs; /* number of messages currently queued */ compat_long_t __reserved[4]; /* ignored for input, zeroed for output */ }; static inline int get_compat_mq_attr(struct mq_attr *attr, const struct compat_mq_attr __user *uattr) { struct compat_mq_attr v; if (copy_from_user(&v, uattr, sizeof(*uattr))) return -EFAULT; memset(attr, 0, sizeof(*attr)); attr->mq_flags = v.mq_flags; attr->mq_maxmsg = v.mq_maxmsg; attr->mq_msgsize = v.mq_msgsize; attr->mq_curmsgs = v.mq_curmsgs; return 0; } static inline int put_compat_mq_attr(const struct mq_attr *attr, struct compat_mq_attr __user *uattr) { struct compat_mq_attr v; memset(&v, 0, sizeof(v)); v.mq_flags = attr->mq_flags; v.mq_maxmsg = attr->mq_maxmsg; v.mq_msgsize = attr->mq_msgsize; v.mq_curmsgs = attr->mq_curmsgs; if (copy_to_user(uattr, &v, sizeof(*uattr))) return -EFAULT; return 0; } COMPAT_SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, compat_mode_t, mode, struct compat_mq_attr __user *, u_attr) { struct mq_attr attr, *p = NULL; if (u_attr && oflag & O_CREAT) { p = &attr; if (get_compat_mq_attr(&attr, u_attr)) return -EFAULT; } return do_mq_open(u_name, oflag, mode, p); } COMPAT_SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes, const struct compat_sigevent __user *, u_notification) { struct sigevent n, *p = NULL; if (u_notification) { if (get_compat_sigevent(&n, u_notification)) return -EFAULT; if (n.sigev_notify == SIGEV_THREAD) n.sigev_value.sival_ptr = compat_ptr(n.sigev_value.sival_int); p = &n; } return do_mq_notify(mqdes, p); } COMPAT_SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes, const struct compat_mq_attr __user *, u_mqstat, struct compat_mq_attr __user *, u_omqstat) { int ret; struct mq_attr mqstat, omqstat; struct mq_attr *new = NULL, *old = NULL; if (u_mqstat) { new = &mqstat; if (get_compat_mq_attr(new, u_mqstat)) return -EFAULT; } if (u_omqstat) old = &omqstat; ret = do_mq_getsetattr(mqdes, new, old); if (ret || !old) return ret; if (put_compat_mq_attr(old, u_omqstat)) return -EFAULT; return 0; } #endif #ifdef CONFIG_COMPAT_32BIT_TIME static int compat_prepare_timeout(const struct old_timespec32 __user *p, struct timespec64 *ts) { if (get_old_timespec32(ts, p)) return -EFAULT; if (!timespec64_valid(ts)) return -EINVAL; return 0; } SYSCALL_DEFINE5(mq_timedsend_time32, mqd_t, mqdes, const char __user *, u_msg_ptr, unsigned int, msg_len, unsigned int, msg_prio, const struct old_timespec32 __user *, u_abs_timeout) { struct timespec64 ts, *p = NULL; if (u_abs_timeout) { int res = compat_prepare_timeout(u_abs_timeout, &ts); if (res) return res; p = &ts; } return do_mq_timedsend(mqdes, u_msg_ptr, msg_len, msg_prio, p); } SYSCALL_DEFINE5(mq_timedreceive_time32, mqd_t, mqdes, char __user *, u_msg_ptr, unsigned int, msg_len, unsigned int __user *, u_msg_prio, const struct old_timespec32 __user *, u_abs_timeout) { struct timespec64 ts, *p = NULL; if (u_abs_timeout) { int res = compat_prepare_timeout(u_abs_timeout, &ts); if (res) return res; p = &ts; } return do_mq_timedreceive(mqdes, u_msg_ptr, msg_len, u_msg_prio, p); } #endif static const struct inode_operations mqueue_dir_inode_operations = { .lookup = simple_lookup, .create = mqueue_create, .unlink = mqueue_unlink, }; static const struct file_operations mqueue_file_operations = { .flush = mqueue_flush_file, .poll = mqueue_poll_file, .read = mqueue_read_file, .llseek = default_llseek, }; static const struct super_operations mqueue_super_ops = { .alloc_inode = mqueue_alloc_inode, .free_inode = mqueue_free_inode, .evict_inode = mqueue_evict_inode, .statfs = simple_statfs, }; static const struct fs_context_operations mqueue_fs_context_ops = { .free = mqueue_fs_context_free, .get_tree = mqueue_get_tree, }; static struct file_system_type mqueue_fs_type = { .name = "mqueue", .init_fs_context = mqueue_init_fs_context, .kill_sb = kill_litter_super, .fs_flags = FS_USERNS_MOUNT, }; int mq_init_ns(struct ipc_namespace *ns) { struct vfsmount *m; ns->mq_queues_count = 0; ns->mq_queues_max = DFLT_QUEUESMAX; ns->mq_msg_max = DFLT_MSGMAX; ns->mq_msgsize_max = DFLT_MSGSIZEMAX; ns->mq_msg_default = DFLT_MSG; ns->mq_msgsize_default = DFLT_MSGSIZE; m = mq_create_mount(ns); if (IS_ERR(m)) return PTR_ERR(m); ns->mq_mnt = m; return 0; } void mq_clear_sbinfo(struct ipc_namespace *ns) { ns->mq_mnt->mnt_sb->s_fs_info = NULL; } static int __init init_mqueue_fs(void) { int error; mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache", sizeof(struct mqueue_inode_info), 0, SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, init_once); if (mqueue_inode_cachep == NULL) return -ENOMEM; if (!setup_mq_sysctls(&init_ipc_ns)) { pr_warn("sysctl registration failed\n"); error = -ENOMEM; goto out_kmem; } error = register_filesystem(&mqueue_fs_type); if (error) goto out_sysctl; spin_lock_init(&mq_lock); error = mq_init_ns(&init_ipc_ns); if (error) goto out_filesystem; return 0; out_filesystem: unregister_filesystem(&mqueue_fs_type); out_sysctl: retire_mq_sysctls(&init_ipc_ns); out_kmem: kmem_cache_destroy(mqueue_inode_cachep); return error; } device_initcall(init_mqueue_fs);
5 4 6 15 17 17 15 15 17 17 17 15 15 15 15 15 15 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012 Red Hat * * based in parts on udlfb.c: * Copyright (C) 2009 Roberto De Ioris <roberto@unbit.it> * Copyright (C) 2009 Jaya Kumar <jayakumar.lkml@gmail.com> * Copyright (C) 2009 Bernie Thompson <bernie@plugable.com> */ #include <linux/unaligned.h> #include <drm/drm.h> #include <drm/drm_print.h> #include <drm/drm_probe_helper.h> #include "udl_drv.h" /* -BULK_SIZE as per usb-skeleton. Can we get full page and avoid overhead? */ #define BULK_SIZE 512 #define NR_USB_REQUEST_CHANNEL 0x12 #define MAX_TRANSFER (PAGE_SIZE*16 - BULK_SIZE) #define WRITES_IN_FLIGHT (20) #define MAX_VENDOR_DESCRIPTOR_SIZE 256 #define UDL_SKU_PIXEL_LIMIT_DEFAULT 2080000 static struct urb *udl_get_urb_locked(struct udl_device *udl, long timeout); /* * Try to make sense of whatever we parse. Therefore return @end on * errors, but don't fail hard. */ static const u8 *udl_parse_key_value_pair(struct udl_device *udl, const u8 *pos, const u8 *end) { u16 key; u8 len; /* read key */ if (pos >= end - 2) return end; key = get_unaligned_le16(pos); pos += 2; /* read value length */ if (pos >= end - 1) return end; len = *pos++; /* read value */ if (pos >= end - len) return end; switch (key) { case 0x0200: { /* maximum number of pixels */ unsigned int sku_pixel_limit; if (len < sizeof(__le32)) break; sku_pixel_limit = get_unaligned_le32(pos); if (sku_pixel_limit >= 16 * UDL_SKU_PIXEL_LIMIT_DEFAULT) break; /* almost 100 MiB, so probably bogus */ udl->sku_pixel_limit = sku_pixel_limit; break; } default: break; } pos += len; return pos; } static int udl_parse_vendor_descriptor(struct udl_device *udl) { struct drm_device *dev = &udl->drm; struct usb_device *udev = udl_to_usb_device(udl); bool detected = false; void *buf; int ret; unsigned int len; const u8 *desc; const u8 *desc_end; buf = kzalloc(MAX_VENDOR_DESCRIPTOR_SIZE, GFP_KERNEL); if (!buf) return -ENOMEM; ret = usb_get_descriptor(udev, 0x5f, /* vendor specific */ 0, buf, MAX_VENDOR_DESCRIPTOR_SIZE); if (ret < 0) goto out; len = ret; if (len < 5) goto out; desc = buf; desc_end = desc + len; if ((desc[0] != len) || /* descriptor length */ (desc[1] != 0x5f) || /* vendor descriptor type */ (desc[2] != 0x01) || /* version (2 bytes) */ (desc[3] != 0x00) || (desc[4] != len - 2)) /* length after type */ goto out; desc += 5; detected = true; while (desc < desc_end) desc = udl_parse_key_value_pair(udl, desc, desc_end); out: if (!detected) drm_warn(dev, "Unrecognized vendor firmware descriptor\n"); kfree(buf); return 0; } /* * Need to ensure a channel is selected before submitting URBs */ int udl_select_std_channel(struct udl_device *udl) { static const u8 set_def_chn[] = {0x57, 0xCD, 0xDC, 0xA7, 0x1C, 0x88, 0x5E, 0x15, 0x60, 0xFE, 0xC6, 0x97, 0x16, 0x3D, 0x47, 0xF2}; void *sendbuf; int ret; struct usb_device *udev = udl_to_usb_device(udl); sendbuf = kmemdup(set_def_chn, sizeof(set_def_chn), GFP_KERNEL); if (!sendbuf) return -ENOMEM; ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), NR_USB_REQUEST_CHANNEL, (USB_DIR_OUT | USB_TYPE_VENDOR), 0, 0, sendbuf, sizeof(set_def_chn), USB_CTRL_SET_TIMEOUT); kfree(sendbuf); return ret < 0 ? ret : 0; } void udl_urb_completion(struct urb *urb) { struct urb_node *unode = urb->context; struct udl_device *udl = unode->dev; unsigned long flags; /* sync/async unlink faults aren't errors */ if (urb->status) { if (!(urb->status == -ENOENT || urb->status == -ECONNRESET || urb->status == -EPROTO || urb->status == -ESHUTDOWN)) { DRM_ERROR("%s - nonzero write bulk status received: %d\n", __func__, urb->status); } } urb->transfer_buffer_length = udl->urbs.size; /* reset to actual */ spin_lock_irqsave(&udl->urbs.lock, flags); list_add_tail(&unode->entry, &udl->urbs.list); udl->urbs.available++; spin_unlock_irqrestore(&udl->urbs.lock, flags); wake_up(&udl->urbs.sleep); } static void udl_free_urb_list(struct udl_device *udl) { struct urb_node *unode; struct urb *urb; DRM_DEBUG("Waiting for completes and freeing all render urbs\n"); /* keep waiting and freeing, until we've got 'em all */ while (udl->urbs.count) { spin_lock_irq(&udl->urbs.lock); urb = udl_get_urb_locked(udl, MAX_SCHEDULE_TIMEOUT); udl->urbs.count--; spin_unlock_irq(&udl->urbs.lock); if (WARN_ON(!urb)) break; unode = urb->context; /* Free each separately allocated piece */ usb_free_coherent(urb->dev, udl->urbs.size, urb->transfer_buffer, urb->transfer_dma); usb_free_urb(urb); kfree(unode); } wake_up_all(&udl->urbs.sleep); } static int udl_alloc_urb_list(struct udl_device *udl, int count, size_t size) { struct urb *urb; struct urb_node *unode; char *buf; size_t wanted_size = count * size; struct usb_device *udev = udl_to_usb_device(udl); spin_lock_init(&udl->urbs.lock); INIT_LIST_HEAD(&udl->urbs.list); init_waitqueue_head(&udl->urbs.sleep); udl->urbs.count = 0; udl->urbs.available = 0; retry: udl->urbs.size = size; while (udl->urbs.count * size < wanted_size) { unode = kzalloc(sizeof(struct urb_node), GFP_KERNEL); if (!unode) break; unode->dev = udl; urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { kfree(unode); break; } unode->urb = urb; buf = usb_alloc_coherent(udev, size, GFP_KERNEL, &urb->transfer_dma); if (!buf) { kfree(unode); usb_free_urb(urb); if (size > PAGE_SIZE) { size /= 2; udl_free_urb_list(udl); goto retry; } break; } /* urb->transfer_buffer_length set to actual before submit */ usb_fill_bulk_urb(urb, udev, usb_sndbulkpipe(udev, 1), buf, size, udl_urb_completion, unode); urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; list_add_tail(&unode->entry, &udl->urbs.list); udl->urbs.count++; udl->urbs.available++; } DRM_DEBUG("allocated %d %d byte urbs\n", udl->urbs.count, (int) size); return udl->urbs.count; } static struct urb *udl_get_urb_locked(struct udl_device *udl, long timeout) { struct urb_node *unode; assert_spin_locked(&udl->urbs.lock); /* Wait for an in-flight buffer to complete and get re-queued */ if (!wait_event_lock_irq_timeout(udl->urbs.sleep, !udl->urbs.count || !list_empty(&udl->urbs.list), udl->urbs.lock, timeout)) { DRM_INFO("wait for urb interrupted: available: %d\n", udl->urbs.available); return NULL; } if (!udl->urbs.count) return NULL; unode = list_first_entry(&udl->urbs.list, struct urb_node, entry); list_del_init(&unode->entry); udl->urbs.available--; return unode->urb; } #define GET_URB_TIMEOUT HZ struct urb *udl_get_urb(struct udl_device *udl) { struct urb *urb; spin_lock_irq(&udl->urbs.lock); urb = udl_get_urb_locked(udl, GET_URB_TIMEOUT); spin_unlock_irq(&udl->urbs.lock); return urb; } int udl_submit_urb(struct udl_device *udl, struct urb *urb, size_t len) { int ret; if (WARN_ON(len > udl->urbs.size)) { ret = -EINVAL; goto error; } urb->transfer_buffer_length = len; /* set to actual payload len */ ret = usb_submit_urb(urb, GFP_ATOMIC); error: if (ret) { udl_urb_completion(urb); /* because no one else will */ DRM_ERROR("usb_submit_urb error %x\n", ret); } return ret; } /* wait until all pending URBs have been processed */ void udl_sync_pending_urbs(struct udl_device *udl) { struct drm_device *dev = &udl->drm; spin_lock_irq(&udl->urbs.lock); /* 2 seconds as a sane timeout */ if (!wait_event_lock_irq_timeout(udl->urbs.sleep, udl->urbs.available == udl->urbs.count, udl->urbs.lock, msecs_to_jiffies(2000))) drm_err(dev, "Timeout for syncing pending URBs\n"); spin_unlock_irq(&udl->urbs.lock); } int udl_init(struct udl_device *udl) { struct drm_device *dev = &udl->drm; int ret = -ENOMEM; struct device *dma_dev; DRM_DEBUG("\n"); dma_dev = usb_intf_get_dma_device(to_usb_interface(dev->dev)); if (dma_dev) { drm_dev_set_dma_dev(dev, dma_dev); put_device(dma_dev); } else { drm_warn(dev, "buffer sharing not supported"); /* not an error */ } /* * Not all devices provide vendor descriptors with device * information. Initialize to default values of real-world * devices. It is just enough memory for FullHD. */ udl->sku_pixel_limit = UDL_SKU_PIXEL_LIMIT_DEFAULT; ret = udl_parse_vendor_descriptor(udl); if (ret) goto err; if (udl_select_std_channel(udl)) DRM_ERROR("Selecting channel failed\n"); if (!udl_alloc_urb_list(udl, WRITES_IN_FLIGHT, MAX_TRANSFER)) { DRM_ERROR("udl_alloc_urb_list failed\n"); ret = -ENOMEM; goto err; } DRM_DEBUG("\n"); ret = udl_modeset_init(udl); if (ret) goto err; return 0; err: if (udl->urbs.count) udl_free_urb_list(udl); DRM_ERROR("%d\n", ret); return ret; } int udl_drop_usb(struct udl_device *udl) { udl_free_urb_list(udl); return 0; }
3 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 // SPDX-License-Identifier: GPL-2.0 /* * LED Triggers for USB Activity * * Copyright 2014 Michal Sojka <sojka@merica.cz> */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/leds.h> #include <linux/usb.h> #include "common.h" #define BLINK_DELAY 30 DEFINE_LED_TRIGGER(ledtrig_usb_gadget); DEFINE_LED_TRIGGER(ledtrig_usb_host); void usb_led_activity(enum usb_led_event ev) { struct led_trigger *trig = NULL; switch (ev) { case USB_LED_EVENT_GADGET: trig = ledtrig_usb_gadget; break; case USB_LED_EVENT_HOST: trig = ledtrig_usb_host; break; } /* led_trigger_blink_oneshot() handles trig == NULL gracefully */ led_trigger_blink_oneshot(trig, BLINK_DELAY, BLINK_DELAY, 0); } EXPORT_SYMBOL_GPL(usb_led_activity); void __init ledtrig_usb_init(void) { led_trigger_register_simple("usb-gadget", &ledtrig_usb_gadget); led_trigger_register_simple("usb-host", &ledtrig_usb_host); } void __exit ledtrig_usb_exit(void) { led_trigger_unregister_simple(ledtrig_usb_gadget); led_trigger_unregister_simple(ledtrig_usb_host); }
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 // SPDX-License-Identifier: GPL-2.0-only /* Frontend part of the Linux driver for the WideView/ Yakumo/ Hama/ * Typhoon/ Yuan DVB-T USB2.0 receiver. * * Copyright (C) 2005 Patrick Boettcher <patrick.boettcher@posteo.de> * * see Documentation/driver-api/media/drivers/dvb-usb.rst for more information */ #include "dtt200u.h" struct dtt200u_fe_state { struct dvb_usb_device *d; enum fe_status stat; struct dtv_frontend_properties fep; struct dvb_frontend frontend; unsigned char data[80]; struct mutex data_mutex; }; static int dtt200u_fe_read_status(struct dvb_frontend *fe, enum fe_status *stat) { struct dtt200u_fe_state *state = fe->demodulator_priv; int ret; mutex_lock(&state->data_mutex); state->data[0] = GET_TUNE_STATUS; ret = dvb_usb_generic_rw(state->d, state->data, 1, state->data, 3, 0); if (ret < 0) { *stat = 0; mutex_unlock(&state->data_mutex); return ret; } switch (state->data[0]) { case 0x01: *stat = FE_HAS_SIGNAL | FE_HAS_CARRIER | FE_HAS_VITERBI | FE_HAS_SYNC | FE_HAS_LOCK; break; case 0x00: /* pending */ *stat = FE_TIMEDOUT; /* during set_frontend */ break; default: case 0x02: /* failed */ *stat = 0; break; } mutex_unlock(&state->data_mutex); return 0; } static int dtt200u_fe_read_ber(struct dvb_frontend* fe, u32 *ber) { struct dtt200u_fe_state *state = fe->demodulator_priv; int ret; mutex_lock(&state->data_mutex); state->data[0] = GET_VIT_ERR_CNT; ret = dvb_usb_generic_rw(state->d, state->data, 1, state->data, 3, 0); if (ret >= 0) *ber = (state->data[0] << 16) | (state->data[1] << 8) | state->data[2]; mutex_unlock(&state->data_mutex); return ret; } static int dtt200u_fe_read_unc_blocks(struct dvb_frontend* fe, u32 *unc) { struct dtt200u_fe_state *state = fe->demodulator_priv; int ret; mutex_lock(&state->data_mutex); state->data[0] = GET_RS_UNCOR_BLK_CNT; ret = dvb_usb_generic_rw(state->d, state->data, 1, state->data, 2, 0); if (ret >= 0) *unc = (state->data[0] << 8) | state->data[1]; mutex_unlock(&state->data_mutex); return ret; } static int dtt200u_fe_read_signal_strength(struct dvb_frontend* fe, u16 *strength) { struct dtt200u_fe_state *state = fe->demodulator_priv; int ret; mutex_lock(&state->data_mutex); state->data[0] = GET_AGC; ret = dvb_usb_generic_rw(state->d, state->data, 1, state->data, 1, 0); if (ret >= 0) *strength = (state->data[0] << 8) | state->data[0]; mutex_unlock(&state->data_mutex); return ret; } static int dtt200u_fe_read_snr(struct dvb_frontend* fe, u16 *snr) { struct dtt200u_fe_state *state = fe->demodulator_priv; int ret; mutex_lock(&state->data_mutex); state->data[0] = GET_SNR; ret = dvb_usb_generic_rw(state->d, state->data, 1, state->data, 1, 0); if (ret >= 0) *snr = ~((state->data[0] << 8) | state->data[0]); mutex_unlock(&state->data_mutex); return ret; } static int dtt200u_fe_init(struct dvb_frontend* fe) { struct dtt200u_fe_state *state = fe->demodulator_priv; int ret; mutex_lock(&state->data_mutex); state->data[0] = SET_INIT; ret = dvb_usb_generic_write(state->d, state->data, 1); mutex_unlock(&state->data_mutex); return ret; } static int dtt200u_fe_sleep(struct dvb_frontend* fe) { return dtt200u_fe_init(fe); } static int dtt200u_fe_get_tune_settings(struct dvb_frontend* fe, struct dvb_frontend_tune_settings *tune) { tune->min_delay_ms = 1500; tune->step_size = 0; tune->max_drift = 0; return 0; } static int dtt200u_fe_set_frontend(struct dvb_frontend *fe) { struct dtv_frontend_properties *fep = &fe->dtv_property_cache; struct dtt200u_fe_state *state = fe->demodulator_priv; int ret; u16 freq = fep->frequency / 250000; mutex_lock(&state->data_mutex); state->data[0] = SET_BANDWIDTH; switch (fep->bandwidth_hz) { case 8000000: state->data[1] = 8; break; case 7000000: state->data[1] = 7; break; case 6000000: state->data[1] = 6; break; default: ret = -EINVAL; goto ret; } ret = dvb_usb_generic_write(state->d, state->data, 2); if (ret < 0) goto ret; state->data[0] = SET_RF_FREQ; state->data[1] = freq & 0xff; state->data[2] = (freq >> 8) & 0xff; ret = dvb_usb_generic_write(state->d, state->data, 3); if (ret < 0) goto ret; ret: mutex_unlock(&state->data_mutex); return ret; } static int dtt200u_fe_get_frontend(struct dvb_frontend* fe, struct dtv_frontend_properties *fep) { struct dtt200u_fe_state *state = fe->demodulator_priv; memcpy(fep, &state->fep, sizeof(struct dtv_frontend_properties)); return 0; } static void dtt200u_fe_release(struct dvb_frontend* fe) { struct dtt200u_fe_state *state = fe->demodulator_priv; kfree(state); } static const struct dvb_frontend_ops dtt200u_fe_ops; struct dvb_frontend* dtt200u_fe_attach(struct dvb_usb_device *d) { struct dtt200u_fe_state* state = NULL; /* allocate memory for the internal state */ state = kzalloc(sizeof(struct dtt200u_fe_state), GFP_KERNEL); if (state == NULL) goto error; deb_info("attaching frontend dtt200u\n"); state->d = d; mutex_init(&state->data_mutex); memcpy(&state->frontend.ops,&dtt200u_fe_ops,sizeof(struct dvb_frontend_ops)); state->frontend.demodulator_priv = state; return &state->frontend; error: return NULL; } static const struct dvb_frontend_ops dtt200u_fe_ops = { .delsys = { SYS_DVBT }, .info = { .name = "WideView USB DVB-T", .frequency_min_hz = 44250 * kHz, .frequency_max_hz = 867250 * kHz, .frequency_stepsize_hz = 250 * kHz, .caps = FE_CAN_INVERSION_AUTO | FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 | FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO | FE_CAN_QPSK | FE_CAN_QAM_16 | FE_CAN_QAM_64 | FE_CAN_QAM_AUTO | FE_CAN_TRANSMISSION_MODE_AUTO | FE_CAN_GUARD_INTERVAL_AUTO | FE_CAN_RECOVER | FE_CAN_HIERARCHY_AUTO, }, .release = dtt200u_fe_release, .init = dtt200u_fe_init, .sleep = dtt200u_fe_sleep, .set_frontend = dtt200u_fe_set_frontend, .get_frontend = dtt200u_fe_get_frontend, .get_tune_settings = dtt200u_fe_get_tune_settings, .read_status = dtt200u_fe_read_status, .read_ber = dtt200u_fe_read_ber, .read_signal_strength = dtt200u_fe_read_signal_strength, .read_snr = dtt200u_fe_read_snr, .read_ucblocks = dtt200u_fe_read_unc_blocks, };
34 7 20 69 28 164 164 26 15 169 23 29 28 91 101 4 4 1 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the UDP module. * * Version: @(#)udp.h 1.0.2 05/07/93 * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * * Fixes: * Alan Cox : Turned on udp checksums. I don't want to * chase 'memory corruption' bugs that aren't! */ #ifndef _UDP_H #define _UDP_H #include <linux/list.h> #include <linux/bug.h> #include <net/inet_sock.h> #include <net/gso.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ip.h> #include <linux/ipv6.h> #include <linux/seq_file.h> #include <linux/poll.h> #include <linux/indirect_call_wrapper.h> /** * struct udp_skb_cb - UDP(-Lite) private variables * * @header: private variables used by IPv4/IPv6 * @cscov: checksum coverage length (UDP-Lite only) * @partial_cov: if set indicates partial csum coverage */ struct udp_skb_cb { union { struct inet_skb_parm h4; #if IS_ENABLED(CONFIG_IPV6) struct inet6_skb_parm h6; #endif } header; __u16 cscov; __u8 partial_cov; }; #define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb)) /** * struct udp_hslot - UDP hash slot used by udp_table.hash/hash4 * * @head: head of list of sockets * @nulls_head: head of list of sockets, only used by hash4 * @count: number of sockets in 'head' list * @lock: spinlock protecting changes to head/count */ struct udp_hslot { union { struct hlist_head head; /* hash4 uses hlist_nulls to avoid moving wrongly onto another * hlist, because rehash() can happen with lookup(). */ struct hlist_nulls_head nulls_head; }; int count; spinlock_t lock; } __aligned(2 * sizeof(long)); /** * struct udp_hslot_main - UDP hash slot used by udp_table.hash2 * * @hslot: basic hash slot * @hash4_cnt: number of sockets in hslot4 of the same * (local port, local address) */ struct udp_hslot_main { struct udp_hslot hslot; /* must be the first member */ #if !IS_ENABLED(CONFIG_BASE_SMALL) u32 hash4_cnt; #endif } __aligned(2 * sizeof(long)); #define UDP_HSLOT_MAIN(__hslot) ((struct udp_hslot_main *)(__hslot)) /** * struct udp_table - UDP table * * @hash: hash table, sockets are hashed on (local port) * @hash2: hash table, sockets are hashed on (local port, local address) * @hash4: hash table, connected sockets are hashed on * (local port, local address, remote port, remote address) * @mask: number of slots in hash tables, minus 1 * @log: log2(number of slots in hash table) */ struct udp_table { struct udp_hslot *hash; struct udp_hslot_main *hash2; #if !IS_ENABLED(CONFIG_BASE_SMALL) struct udp_hslot *hash4; #endif unsigned int mask; unsigned int log; }; extern struct udp_table udp_table; void udp_table_init(struct udp_table *, const char *); static inline struct udp_hslot *udp_hashslot(struct udp_table *table, const struct net *net, unsigned int num) { return &table->hash[udp_hashfn(net, num, table->mask)]; } /* * For secondary hash, net_hash_mix() is performed before calling * udp_hashslot2(), this explains difference with udp_hashslot() */ static inline struct udp_hslot *udp_hashslot2(struct udp_table *table, unsigned int hash) { return &table->hash2[hash & table->mask].hslot; } #if IS_ENABLED(CONFIG_BASE_SMALL) static inline void udp_table_hash4_init(struct udp_table *table) { } static inline struct udp_hslot *udp_hashslot4(struct udp_table *table, unsigned int hash) { BUILD_BUG(); return NULL; } static inline bool udp_hashed4(const struct sock *sk) { return false; } static inline unsigned int udp_hash4_slot_size(void) { return 0; } static inline bool udp_has_hash4(const struct udp_hslot *hslot2) { return false; } static inline void udp_hash4_inc(struct udp_hslot *hslot2) { } static inline void udp_hash4_dec(struct udp_hslot *hslot2) { } #else /* !CONFIG_BASE_SMALL */ /* Must be called with table->hash2 initialized */ static inline void udp_table_hash4_init(struct udp_table *table) { table->hash4 = (void *)(table->hash2 + (table->mask + 1)); for (int i = 0; i <= table->mask; i++) { table->hash2[i].hash4_cnt = 0; INIT_HLIST_NULLS_HEAD(&table->hash4[i].nulls_head, i); table->hash4[i].count = 0; spin_lock_init(&table->hash4[i].lock); } } static inline struct udp_hslot *udp_hashslot4(struct udp_table *table, unsigned int hash) { return &table->hash4[hash & table->mask]; } static inline bool udp_hashed4(const struct sock *sk) { return !hlist_nulls_unhashed(&udp_sk(sk)->udp_lrpa_node); } static inline unsigned int udp_hash4_slot_size(void) { return sizeof(struct udp_hslot); } static inline bool udp_has_hash4(const struct udp_hslot *hslot2) { return UDP_HSLOT_MAIN(hslot2)->hash4_cnt; } static inline void udp_hash4_inc(struct udp_hslot *hslot2) { UDP_HSLOT_MAIN(hslot2)->hash4_cnt++; } static inline void udp_hash4_dec(struct udp_hslot *hslot2) { UDP_HSLOT_MAIN(hslot2)->hash4_cnt--; } #endif /* CONFIG_BASE_SMALL */ extern struct proto udp_prot; extern atomic_long_t udp_memory_allocated; DECLARE_PER_CPU(int, udp_memory_per_cpu_fw_alloc); /* sysctl variables for udp */ extern long sysctl_udp_mem[3]; extern int sysctl_udp_rmem_min; extern int sysctl_udp_wmem_min; struct sk_buff; /* * Generic checksumming routines for UDP(-Lite) v4 and v6 */ static inline __sum16 __udp_lib_checksum_complete(struct sk_buff *skb) { return (UDP_SKB_CB(skb)->cscov == skb->len ? __skb_checksum_complete(skb) : __skb_checksum_complete_head(skb, UDP_SKB_CB(skb)->cscov)); } static inline int udp_lib_checksum_complete(struct sk_buff *skb) { return !skb_csum_unnecessary(skb) && __udp_lib_checksum_complete(skb); } /** * udp_csum_outgoing - compute UDPv4/v6 checksum over fragments * @sk: socket we are writing to * @skb: sk_buff containing the filled-in UDP header * (checksum field must be zeroed out) */ static inline __wsum udp_csum_outgoing(struct sock *sk, struct sk_buff *skb) { __wsum csum = csum_partial(skb_transport_header(skb), sizeof(struct udphdr), 0); skb_queue_walk(&sk->sk_write_queue, skb) { csum = csum_add(csum, skb->csum); } return csum; } static inline __wsum udp_csum(struct sk_buff *skb) { __wsum csum = csum_partial(skb_transport_header(skb), sizeof(struct udphdr), skb->csum); for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next) { csum = csum_add(csum, skb->csum); } return csum; } static inline __sum16 udp_v4_check(int len, __be32 saddr, __be32 daddr, __wsum base) { return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base); } void udp_set_csum(bool nocheck, struct sk_buff *skb, __be32 saddr, __be32 daddr, int len); static inline void udp_csum_pull_header(struct sk_buff *skb) { if (!skb->csum_valid && skb->ip_summed == CHECKSUM_NONE) skb->csum = csum_partial(skb->data, sizeof(struct udphdr), skb->csum); skb_pull_rcsum(skb, sizeof(struct udphdr)); UDP_SKB_CB(skb)->cscov -= sizeof(struct udphdr); } typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport, __be16 dport); void udp_v6_early_demux(struct sk_buff *skb); INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, netdev_features_t features, bool is_ipv6); static inline void udp_lib_init_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); skb_queue_head_init(&up->reader_queue); INIT_HLIST_NODE(&up->tunnel_list); up->forward_threshold = sk->sk_rcvbuf >> 2; set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); } /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ static inline int udp_lib_hash(struct sock *sk) { BUG(); return 0; } void udp_lib_unhash(struct sock *sk); void udp_lib_rehash(struct sock *sk, u16 new_hash, u16 new_hash4); u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport); static inline void udp_lib_close(struct sock *sk, long timeout) { sk_common_release(sk); } /* hash4 routines shared between UDPv4/6 */ #if IS_ENABLED(CONFIG_BASE_SMALL) static inline void udp_lib_hash4(struct sock *sk, u16 hash) { } static inline void udp4_hash4(struct sock *sk) { } #else /* !CONFIG_BASE_SMALL */ void udp_lib_hash4(struct sock *sk, u16 hash); void udp4_hash4(struct sock *sk); #endif /* CONFIG_BASE_SMALL */ int udp_lib_get_port(struct sock *sk, unsigned short snum, unsigned int hash2_nulladdr); u32 udp_flow_hashrnd(void); static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb, int min, int max, bool use_eth) { u32 hash; if (min >= max) { /* Use default range */ inet_get_local_port_range(net, &min, &max); } hash = skb_get_hash(skb); if (unlikely(!hash)) { if (use_eth) { /* Can't find a normal hash, caller has indicated an * Ethernet packet so use that to compute a hash. */ hash = jhash(skb->data, 2 * ETH_ALEN, (__force u32) skb->protocol); } else { /* Can't derive any sort of hash for the packet, set * to some consistent random value. */ hash = udp_flow_hashrnd(); } } /* Since this is being sent on the wire obfuscate hash a bit * to minimize possibility that any useful information to an * attacker is leaked. Only upper 16 bits are relevant in the * computation for 16 bit port value. */ hash ^= hash << 16; return htons((((u64) hash * (max - min)) >> 32) + min); } static inline int udp_rqueue_get(struct sock *sk) { return sk_rmem_alloc_get(sk) - READ_ONCE(udp_sk(sk)->forward_deficit); } static inline bool udp_sk_bound_dev_eq(const struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); #endif } /* net/ipv4/udp.c */ void udp_destruct_common(struct sock *sk); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, int *off, int *err); static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, int *err) { int off = 0; return __skb_recv_udp(sk, flags, &off, err); } int udp_v4_early_demux(struct sk_buff *skb); bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); int udp_err(struct sk_buff *, u32); int udp_abort(struct sock *sk, int err); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); void udp_splice_eof(struct socket *sock); int udp_push_pending_frames(struct sock *sk); void udp_flush_pending_frames(struct sock *sk); int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, int *karg); int udp_init_sock(struct sock *sk); int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait); struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, bool is_ipv6); int udp_lib_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int udp_lib_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen, int (*push_pending_frames)(struct sock *)); struct sock *udp4_lib_lookup(const struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, int sdif, struct udp_table *tbl, struct sk_buff *skb); struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport); struct sock *udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif); struct sock *__udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif, int sdif, struct udp_table *tbl, struct sk_buff *skb); struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport); int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); /* UDP uses skb->dev_scratch to cache as much information as possible and avoid * possibly multiple cache miss on dequeue() */ struct udp_dev_scratch { /* skb->truesize and the stateless bit are embedded in a single field; * do not use a bitfield since the compiler emits better/smaller code * this way */ u32 _tsize_state; #if BITS_PER_LONG == 64 /* len and the bit needed to compute skb_csum_unnecessary * will be on cold cache lines at recvmsg time. * skb->len can be stored on 16 bits since the udp header has been * already validated and pulled. */ u16 len; bool is_linear; bool csum_unnecessary; #endif }; static inline struct udp_dev_scratch *udp_skb_scratch(struct sk_buff *skb) { return (struct udp_dev_scratch *)&skb->dev_scratch; } #if BITS_PER_LONG == 64 static inline unsigned int udp_skb_len(struct sk_buff *skb) { return udp_skb_scratch(skb)->len; } static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb) { return udp_skb_scratch(skb)->csum_unnecessary; } static inline bool udp_skb_is_linear(struct sk_buff *skb) { return udp_skb_scratch(skb)->is_linear; } #else static inline unsigned int udp_skb_len(struct sk_buff *skb) { return skb->len; } static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb) { return skb_csum_unnecessary(skb); } static inline bool udp_skb_is_linear(struct sk_buff *skb) { return !skb_is_nonlinear(skb); } #endif static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, struct iov_iter *to) { return copy_to_iter_full(skb->data + off, len, to) ? 0 : -EFAULT; } /* * SNMP statistics for UDP and UDP-Lite */ #define UDP_INC_STATS(net, field, is_udplite) do { \ if (is_udplite) SNMP_INC_STATS((net)->mib.udplite_statistics, field); \ else SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0) #define __UDP_INC_STATS(net, field, is_udplite) do { \ if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_statistics, field); \ else __SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0) #define __UDP6_INC_STATS(net, field, is_udplite) do { \ if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_stats_in6, field);\ else __SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \ } while(0) #define UDP6_INC_STATS(net, field, __lite) do { \ if (__lite) SNMP_INC_STATS((net)->mib.udplite_stats_in6, field); \ else SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \ } while(0) #if IS_ENABLED(CONFIG_IPV6) #define __UDPX_MIB(sk, ipv4) \ ({ \ ipv4 ? (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ sock_net(sk)->mib.udp_statistics) : \ (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_stats_in6 : \ sock_net(sk)->mib.udp_stats_in6); \ }) #else #define __UDPX_MIB(sk, ipv4) \ ({ \ IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ sock_net(sk)->mib.udp_statistics; \ }) #endif #define __UDPX_INC_STATS(sk, field) \ __SNMP_INC_STATS(__UDPX_MIB(sk, (sk)->sk_family == AF_INET), field) #ifdef CONFIG_PROC_FS struct udp_seq_afinfo { sa_family_t family; struct udp_table *udp_table; }; struct udp_iter_state { struct seq_net_private p; int bucket; }; void *udp_seq_start(struct seq_file *seq, loff_t *pos); void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos); void udp_seq_stop(struct seq_file *seq, void *v); extern const struct seq_operations udp_seq_ops; extern const struct seq_operations udp6_seq_ops; int udp4_proc_init(void); void udp4_proc_exit(void); #endif /* CONFIG_PROC_FS */ int udpv4_offload_init(void); void udp_init(void); DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); void udp_encap_enable(void); void udp_encap_disable(void); #if IS_ENABLED(CONFIG_IPV6) DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void); #endif static inline struct sk_buff *udp_rcv_segment(struct sock *sk, struct sk_buff *skb, bool ipv4) { netdev_features_t features = NETIF_F_SG; struct sk_buff *segs; /* Avoid csum recalculation by skb_segment unless userspace explicitly * asks for the final checksum values */ if (!inet_get_convert_csum(sk)) features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; /* UDP segmentation expects packets of type CHECKSUM_PARTIAL or * CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial * packets in udp_gro_complete_segment. As does UDP GSO, verified by * udp_send_skb. But when those packets are looped in dev_loopback_xmit * their ip_summed CHECKSUM_NONE is changed to CHECKSUM_UNNECESSARY. * Reset in this specific case, where PARTIAL is both correct and * required. */ if (skb->pkt_type == PACKET_LOOPBACK) skb->ip_summed = CHECKSUM_PARTIAL; /* the GSO CB lays after the UDP one, no need to save and restore any * CB fragment */ segs = __skb_gso_segment(skb, features, false); if (IS_ERR_OR_NULL(segs)) { int segs_nr = skb_shinfo(skb)->gso_segs; atomic_add(segs_nr, &sk->sk_drops); SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, segs_nr); kfree_skb(skb); return NULL; } consume_skb(skb); return segs; } static inline void udp_post_segment_fix_csum(struct sk_buff *skb) { /* UDP-lite can't land here - no GRO */ WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov); /* UDP packets generated with UDP_SEGMENT and traversing: * * UDP tunnel(xmit) -> veth (segmentation) -> veth (gro) -> UDP tunnel (rx) * * can reach an UDP socket with CHECKSUM_NONE, because * __iptunnel_pull_header() converts CHECKSUM_PARTIAL into NONE. * SKB_GSO_UDP_L4 or SKB_GSO_FRAGLIST packets with no UDP tunnel will * have a valid checksum, as the GRO engine validates the UDP csum * before the aggregation and nobody strips such info in between. * Instead of adding another check in the tunnel fastpath, we can force * a valid csum after the segmentation. * Additionally fixup the UDP CB. */ UDP_SKB_CB(skb)->cscov = skb->len; if (skb->ip_summed == CHECKSUM_NONE && !skb->csum_valid) skb->csum_valid = 1; } #ifdef CONFIG_BPF_SYSCALL struct sk_psock; int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); #endif #endif /* _UDP_H */
61 1 3 8 2 2 31 1 49 46 2 2 4 2 4 32 1 29 1 1 1 1 1 61 33 28 28 27 28 15 16 2 1 6 2 8 1 1 1 3 2 6 1 2 2 1 3 2 5 2 3 5 11 9 1 1 1 1 1 9 10 10 10 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 // SPDX-License-Identifier: GPL-2.0-only /* * v4l2-dv-timings - dv-timings helper functions * * Copyright 2013 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/rational.h> #include <linux/videodev2.h> #include <linux/v4l2-dv-timings.h> #include <media/v4l2-dv-timings.h> #include <linux/math64.h> #include <linux/hdmi.h> #include <media/cec.h> MODULE_AUTHOR("Hans Verkuil"); MODULE_DESCRIPTION("V4L2 DV Timings Helper Functions"); MODULE_LICENSE("GPL"); const struct v4l2_dv_timings v4l2_dv_timings_presets[] = { V4L2_DV_BT_CEA_640X480P59_94, V4L2_DV_BT_CEA_720X480I59_94, V4L2_DV_BT_CEA_720X480P59_94, V4L2_DV_BT_CEA_720X576I50, V4L2_DV_BT_CEA_720X576P50, V4L2_DV_BT_CEA_1280X720P24, V4L2_DV_BT_CEA_1280X720P25, V4L2_DV_BT_CEA_1280X720P30, V4L2_DV_BT_CEA_1280X720P50, V4L2_DV_BT_CEA_1280X720P60, V4L2_DV_BT_CEA_1920X1080P24, V4L2_DV_BT_CEA_1920X1080P25, V4L2_DV_BT_CEA_1920X1080P30, V4L2_DV_BT_CEA_1920X1080I50, V4L2_DV_BT_CEA_1920X1080P50, V4L2_DV_BT_CEA_1920X1080I60, V4L2_DV_BT_CEA_1920X1080P60, V4L2_DV_BT_DMT_640X350P85, V4L2_DV_BT_DMT_640X400P85, V4L2_DV_BT_DMT_720X400P85, V4L2_DV_BT_DMT_640X480P72, V4L2_DV_BT_DMT_640X480P75, V4L2_DV_BT_DMT_640X480P85, V4L2_DV_BT_DMT_800X600P56, V4L2_DV_BT_DMT_800X600P60, V4L2_DV_BT_DMT_800X600P72, V4L2_DV_BT_DMT_800X600P75, V4L2_DV_BT_DMT_800X600P85, V4L2_DV_BT_DMT_800X600P120_RB, V4L2_DV_BT_DMT_848X480P60, V4L2_DV_BT_DMT_1024X768I43, V4L2_DV_BT_DMT_1024X768P60, V4L2_DV_BT_DMT_1024X768P70, V4L2_DV_BT_DMT_1024X768P75, V4L2_DV_BT_DMT_1024X768P85, V4L2_DV_BT_DMT_1024X768P120_RB, V4L2_DV_BT_DMT_1152X864P75, V4L2_DV_BT_DMT_1280X768P60_RB, V4L2_DV_BT_DMT_1280X768P60, V4L2_DV_BT_DMT_1280X768P75, V4L2_DV_BT_DMT_1280X768P85, V4L2_DV_BT_DMT_1280X768P120_RB, V4L2_DV_BT_DMT_1280X800P60_RB, V4L2_DV_BT_DMT_1280X800P60, V4L2_DV_BT_DMT_1280X800P75, V4L2_DV_BT_DMT_1280X800P85, V4L2_DV_BT_DMT_1280X800P120_RB, V4L2_DV_BT_DMT_1280X960P60, V4L2_DV_BT_DMT_1280X960P85, V4L2_DV_BT_DMT_1280X960P120_RB, V4L2_DV_BT_DMT_1280X1024P60, V4L2_DV_BT_DMT_1280X1024P75, V4L2_DV_BT_DMT_1280X1024P85, V4L2_DV_BT_DMT_1280X1024P120_RB, V4L2_DV_BT_DMT_1360X768P60, V4L2_DV_BT_DMT_1360X768P120_RB, V4L2_DV_BT_DMT_1366X768P60, V4L2_DV_BT_DMT_1366X768P60_RB, V4L2_DV_BT_DMT_1400X1050P60_RB, V4L2_DV_BT_DMT_1400X1050P60, V4L2_DV_BT_DMT_1400X1050P75, V4L2_DV_BT_DMT_1400X1050P85, V4L2_DV_BT_DMT_1400X1050P120_RB, V4L2_DV_BT_DMT_1440X900P60_RB, V4L2_DV_BT_DMT_1440X900P60, V4L2_DV_BT_DMT_1440X900P75, V4L2_DV_BT_DMT_1440X900P85, V4L2_DV_BT_DMT_1440X900P120_RB, V4L2_DV_BT_DMT_1600X900P60_RB, V4L2_DV_BT_DMT_1600X1200P60, V4L2_DV_BT_DMT_1600X1200P65, V4L2_DV_BT_DMT_1600X1200P70, V4L2_DV_BT_DMT_1600X1200P75, V4L2_DV_BT_DMT_1600X1200P85, V4L2_DV_BT_DMT_1600X1200P120_RB, V4L2_DV_BT_DMT_1680X1050P60_RB, V4L2_DV_BT_DMT_1680X1050P60, V4L2_DV_BT_DMT_1680X1050P75, V4L2_DV_BT_DMT_1680X1050P85, V4L2_DV_BT_DMT_1680X1050P120_RB, V4L2_DV_BT_DMT_1792X1344P60, V4L2_DV_BT_DMT_1792X1344P75, V4L2_DV_BT_DMT_1792X1344P120_RB, V4L2_DV_BT_DMT_1856X1392P60, V4L2_DV_BT_DMT_1856X1392P75, V4L2_DV_BT_DMT_1856X1392P120_RB, V4L2_DV_BT_DMT_1920X1200P60_RB, V4L2_DV_BT_DMT_1920X1200P60, V4L2_DV_BT_DMT_1920X1200P75, V4L2_DV_BT_DMT_1920X1200P85, V4L2_DV_BT_DMT_1920X1200P120_RB, V4L2_DV_BT_DMT_1920X1440P60, V4L2_DV_BT_DMT_1920X1440P75, V4L2_DV_BT_DMT_1920X1440P120_RB, V4L2_DV_BT_DMT_2048X1152P60_RB, V4L2_DV_BT_DMT_2560X1600P60_RB, V4L2_DV_BT_DMT_2560X1600P60, V4L2_DV_BT_DMT_2560X1600P75, V4L2_DV_BT_DMT_2560X1600P85, V4L2_DV_BT_DMT_2560X1600P120_RB, V4L2_DV_BT_CEA_3840X2160P24, V4L2_DV_BT_CEA_3840X2160P25, V4L2_DV_BT_CEA_3840X2160P30, V4L2_DV_BT_CEA_3840X2160P50, V4L2_DV_BT_CEA_3840X2160P60, V4L2_DV_BT_CEA_4096X2160P24, V4L2_DV_BT_CEA_4096X2160P25, V4L2_DV_BT_CEA_4096X2160P30, V4L2_DV_BT_CEA_4096X2160P50, V4L2_DV_BT_DMT_4096X2160P59_94_RB, V4L2_DV_BT_CEA_4096X2160P60, { } }; EXPORT_SYMBOL_GPL(v4l2_dv_timings_presets); bool v4l2_valid_dv_timings(const struct v4l2_dv_timings *t, const struct v4l2_dv_timings_cap *dvcap, v4l2_check_dv_timings_fnc fnc, void *fnc_handle) { const struct v4l2_bt_timings *bt = &t->bt; const struct v4l2_bt_timings_cap *cap = &dvcap->bt; u32 caps = cap->capabilities; const u32 max_vert = 10240; u32 max_hor = 3 * bt->width; if (t->type != V4L2_DV_BT_656_1120) return false; if (t->type != dvcap->type || bt->height < cap->min_height || bt->height > cap->max_height || bt->width < cap->min_width || bt->width > cap->max_width || bt->pixelclock < cap->min_pixelclock || bt->pixelclock > cap->max_pixelclock || (!(caps & V4L2_DV_BT_CAP_CUSTOM) && cap->standards && bt->standards && !(bt->standards & cap->standards)) || (bt->interlaced && !(caps & V4L2_DV_BT_CAP_INTERLACED)) || (!bt->interlaced && !(caps & V4L2_DV_BT_CAP_PROGRESSIVE))) return false; /* sanity checks for the blanking timings */ if (!bt->interlaced && (bt->il_vbackporch || bt->il_vsync || bt->il_vfrontporch)) return false; /* * Some video receivers cannot properly separate the frontporch, * backporch and sync values, and instead they only have the total * blanking. That can be assigned to any of these three fields. * So just check that none of these are way out of range. */ if (bt->hfrontporch > max_hor || bt->hsync > max_hor || bt->hbackporch > max_hor) return false; if (bt->vfrontporch > max_vert || bt->vsync > max_vert || bt->vbackporch > max_vert) return false; if (bt->interlaced && (bt->il_vfrontporch > max_vert || bt->il_vsync > max_vert || bt->il_vbackporch > max_vert)) return false; return fnc == NULL || fnc(t, fnc_handle); } EXPORT_SYMBOL_GPL(v4l2_valid_dv_timings); int v4l2_enum_dv_timings_cap(struct v4l2_enum_dv_timings *t, const struct v4l2_dv_timings_cap *cap, v4l2_check_dv_timings_fnc fnc, void *fnc_handle) { u32 i, idx; memset(t->reserved, 0, sizeof(t->reserved)); for (i = idx = 0; v4l2_dv_timings_presets[i].bt.width; i++) { if (v4l2_valid_dv_timings(v4l2_dv_timings_presets + i, cap, fnc, fnc_handle) && idx++ == t->index) { t->timings = v4l2_dv_timings_presets[i]; return 0; } } return -EINVAL; } EXPORT_SYMBOL_GPL(v4l2_enum_dv_timings_cap); bool v4l2_find_dv_timings_cap(struct v4l2_dv_timings *t, const struct v4l2_dv_timings_cap *cap, unsigned pclock_delta, v4l2_check_dv_timings_fnc fnc, void *fnc_handle) { int i; if (!v4l2_valid_dv_timings(t, cap, fnc, fnc_handle)) return false; for (i = 0; v4l2_dv_timings_presets[i].bt.width; i++) { if (v4l2_valid_dv_timings(v4l2_dv_timings_presets + i, cap, fnc, fnc_handle) && v4l2_match_dv_timings(t, v4l2_dv_timings_presets + i, pclock_delta, false)) { u32 flags = t->bt.flags & V4L2_DV_FL_REDUCED_FPS; *t = v4l2_dv_timings_presets[i]; if (can_reduce_fps(&t->bt)) t->bt.flags |= flags; return true; } } return false; } EXPORT_SYMBOL_GPL(v4l2_find_dv_timings_cap); bool v4l2_find_dv_timings_cea861_vic(struct v4l2_dv_timings *t, u8 vic) { unsigned int i; for (i = 0; v4l2_dv_timings_presets[i].bt.width; i++) { const struct v4l2_bt_timings *bt = &v4l2_dv_timings_presets[i].bt; if ((bt->flags & V4L2_DV_FL_HAS_CEA861_VIC) && bt->cea861_vic == vic) { *t = v4l2_dv_timings_presets[i]; return true; } } return false; } EXPORT_SYMBOL_GPL(v4l2_find_dv_timings_cea861_vic); /** * v4l2_match_dv_timings - check if two timings match * @t1: compare this v4l2_dv_timings struct... * @t2: with this struct. * @pclock_delta: the allowed pixelclock deviation. * @match_reduced_fps: if true, then fail if V4L2_DV_FL_REDUCED_FPS does not * match. * * Compare t1 with t2 with a given margin of error for the pixelclock. */ bool v4l2_match_dv_timings(const struct v4l2_dv_timings *t1, const struct v4l2_dv_timings *t2, unsigned pclock_delta, bool match_reduced_fps) { if (t1->type != t2->type || t1->type != V4L2_DV_BT_656_1120) return false; if (t1->bt.width == t2->bt.width && t1->bt.height == t2->bt.height && t1->bt.interlaced == t2->bt.interlaced && t1->bt.polarities == t2->bt.polarities && t1->bt.pixelclock >= t2->bt.pixelclock - pclock_delta && t1->bt.pixelclock <= t2->bt.pixelclock + pclock_delta && t1->bt.hfrontporch == t2->bt.hfrontporch && t1->bt.hsync == t2->bt.hsync && t1->bt.hbackporch == t2->bt.hbackporch && t1->bt.vfrontporch == t2->bt.vfrontporch && t1->bt.vsync == t2->bt.vsync && t1->bt.vbackporch == t2->bt.vbackporch && (!match_reduced_fps || (t1->bt.flags & V4L2_DV_FL_REDUCED_FPS) == (t2->bt.flags & V4L2_DV_FL_REDUCED_FPS)) && (!t1->bt.interlaced || (t1->bt.il_vfrontporch == t2->bt.il_vfrontporch && t1->bt.il_vsync == t2->bt.il_vsync && t1->bt.il_vbackporch == t2->bt.il_vbackporch))) return true; return false; } EXPORT_SYMBOL_GPL(v4l2_match_dv_timings); void v4l2_print_dv_timings(const char *dev_prefix, const char *prefix, const struct v4l2_dv_timings *t, bool detailed) { const struct v4l2_bt_timings *bt = &t->bt; u32 htot, vtot; u32 fps; if (t->type != V4L2_DV_BT_656_1120) return; htot = V4L2_DV_BT_FRAME_WIDTH(bt); vtot = V4L2_DV_BT_FRAME_HEIGHT(bt); if (bt->interlaced) vtot /= 2; fps = (htot * vtot) > 0 ? div_u64((100 * (u64)bt->pixelclock), (htot * vtot)) : 0; if (prefix == NULL) prefix = ""; pr_info("%s: %s%ux%u%s%u.%02u (%ux%u)\n", dev_prefix, prefix, bt->width, bt->height, bt->interlaced ? "i" : "p", fps / 100, fps % 100, htot, vtot); if (!detailed) return; pr_info("%s: horizontal: fp = %u, %ssync = %u, bp = %u\n", dev_prefix, bt->hfrontporch, (bt->polarities & V4L2_DV_HSYNC_POS_POL) ? "+" : "-", bt->hsync, bt->hbackporch); pr_info("%s: vertical: fp = %u, %ssync = %u, bp = %u\n", dev_prefix, bt->vfrontporch, (bt->polarities & V4L2_DV_VSYNC_POS_POL) ? "+" : "-", bt->vsync, bt->vbackporch); if (bt->interlaced) pr_info("%s: vertical bottom field: fp = %u, %ssync = %u, bp = %u\n", dev_prefix, bt->il_vfrontporch, (bt->polarities & V4L2_DV_VSYNC_POS_POL) ? "+" : "-", bt->il_vsync, bt->il_vbackporch); pr_info("%s: pixelclock: %llu\n", dev_prefix, bt->pixelclock); pr_info("%s: flags (0x%x):%s%s%s%s%s%s%s%s%s%s\n", dev_prefix, bt->flags, (bt->flags & V4L2_DV_FL_REDUCED_BLANKING) ? " REDUCED_BLANKING" : "", ((bt->flags & V4L2_DV_FL_REDUCED_BLANKING) && bt->vsync == 8) ? " (V2)" : "", (bt->flags & V4L2_DV_FL_CAN_REDUCE_FPS) ? " CAN_REDUCE_FPS" : "", (bt->flags & V4L2_DV_FL_REDUCED_FPS) ? " REDUCED_FPS" : "", (bt->flags & V4L2_DV_FL_HALF_LINE) ? " HALF_LINE" : "", (bt->flags & V4L2_DV_FL_IS_CE_VIDEO) ? " CE_VIDEO" : "", (bt->flags & V4L2_DV_FL_FIRST_FIELD_EXTRA_LINE) ? " FIRST_FIELD_EXTRA_LINE" : "", (bt->flags & V4L2_DV_FL_HAS_PICTURE_ASPECT) ? " HAS_PICTURE_ASPECT" : "", (bt->flags & V4L2_DV_FL_HAS_CEA861_VIC) ? " HAS_CEA861_VIC" : "", (bt->flags & V4L2_DV_FL_HAS_HDMI_VIC) ? " HAS_HDMI_VIC" : ""); pr_info("%s: standards (0x%x):%s%s%s%s%s\n", dev_prefix, bt->standards, (bt->standards & V4L2_DV_BT_STD_CEA861) ? " CEA" : "", (bt->standards & V4L2_DV_BT_STD_DMT) ? " DMT" : "", (bt->standards & V4L2_DV_BT_STD_CVT) ? " CVT" : "", (bt->standards & V4L2_DV_BT_STD_GTF) ? " GTF" : "", (bt->standards & V4L2_DV_BT_STD_SDI) ? " SDI" : ""); if (bt->flags & V4L2_DV_FL_HAS_PICTURE_ASPECT) pr_info("%s: picture aspect (hor:vert): %u:%u\n", dev_prefix, bt->picture_aspect.numerator, bt->picture_aspect.denominator); if (bt->flags & V4L2_DV_FL_HAS_CEA861_VIC) pr_info("%s: CEA-861 VIC: %u\n", dev_prefix, bt->cea861_vic); if (bt->flags & V4L2_DV_FL_HAS_HDMI_VIC) pr_info("%s: HDMI VIC: %u\n", dev_prefix, bt->hdmi_vic); } EXPORT_SYMBOL_GPL(v4l2_print_dv_timings); struct v4l2_fract v4l2_dv_timings_aspect_ratio(const struct v4l2_dv_timings *t) { struct v4l2_fract ratio = { 1, 1 }; unsigned long n, d; if (t->type != V4L2_DV_BT_656_1120) return ratio; if (!(t->bt.flags & V4L2_DV_FL_HAS_PICTURE_ASPECT)) return ratio; ratio.numerator = t->bt.width * t->bt.picture_aspect.denominator; ratio.denominator = t->bt.height * t->bt.picture_aspect.numerator; rational_best_approximation(ratio.numerator, ratio.denominator, ratio.numerator, ratio.denominator, &n, &d); ratio.numerator = n; ratio.denominator = d; return ratio; } EXPORT_SYMBOL_GPL(v4l2_dv_timings_aspect_ratio); /** v4l2_calc_timeperframe - helper function to calculate timeperframe based * v4l2_dv_timings fields. * @t - Timings for the video mode. * * Calculates the expected timeperframe using the pixel clock value and * horizontal/vertical measures. This means that v4l2_dv_timings structure * must be correctly and fully filled. */ struct v4l2_fract v4l2_calc_timeperframe(const struct v4l2_dv_timings *t) { const struct v4l2_bt_timings *bt = &t->bt; struct v4l2_fract fps_fract = { 1, 1 }; unsigned long n, d; u32 htot, vtot, fps; u64 pclk; if (t->type != V4L2_DV_BT_656_1120) return fps_fract; htot = V4L2_DV_BT_FRAME_WIDTH(bt); vtot = V4L2_DV_BT_FRAME_HEIGHT(bt); pclk = bt->pixelclock; if ((bt->flags & V4L2_DV_FL_CAN_DETECT_REDUCED_FPS) && (bt->flags & V4L2_DV_FL_REDUCED_FPS)) pclk = div_u64(pclk * 1000ULL, 1001); fps = (htot * vtot) > 0 ? div_u64((100 * pclk), (htot * vtot)) : 0; if (!fps) return fps_fract; rational_best_approximation(fps, 100, fps, 100, &n, &d); fps_fract.numerator = d; fps_fract.denominator = n; return fps_fract; } EXPORT_SYMBOL_GPL(v4l2_calc_timeperframe); /* * CVT defines * Based on Coordinated Video Timings Standard * version 1.1 September 10, 2003 */ #define CVT_PXL_CLK_GRAN 250000 /* pixel clock granularity */ #define CVT_PXL_CLK_GRAN_RB_V2 1000 /* granularity for reduced blanking v2*/ /* Normal blanking */ #define CVT_MIN_V_BPORCH 7 /* lines */ #define CVT_MIN_V_PORCH_RND 3 /* lines */ #define CVT_MIN_VSYNC_BP 550 /* min time of vsync + back porch (us) */ #define CVT_HSYNC_PERCENT 8 /* nominal hsync as percentage of line */ /* Normal blanking for CVT uses GTF to calculate horizontal blanking */ #define CVT_CELL_GRAN 8 /* character cell granularity */ #define CVT_M 600 /* blanking formula gradient */ #define CVT_C 40 /* blanking formula offset */ #define CVT_K 128 /* blanking formula scaling factor */ #define CVT_J 20 /* blanking formula scaling factor */ #define CVT_C_PRIME (((CVT_C - CVT_J) * CVT_K / 256) + CVT_J) #define CVT_M_PRIME (CVT_K * CVT_M / 256) /* Reduced Blanking */ #define CVT_RB_MIN_V_BPORCH 7 /* lines */ #define CVT_RB_V_FPORCH 3 /* lines */ #define CVT_RB_MIN_V_BLANK 460 /* us */ #define CVT_RB_H_SYNC 32 /* pixels */ #define CVT_RB_H_BLANK 160 /* pixels */ /* Reduce blanking Version 2 */ #define CVT_RB_V2_H_BLANK 80 /* pixels */ #define CVT_RB_MIN_V_FPORCH 3 /* lines */ #define CVT_RB_V2_MIN_V_FPORCH 1 /* lines */ #define CVT_RB_V_BPORCH 6 /* lines */ /** v4l2_detect_cvt - detect if the given timings follow the CVT standard * @frame_height - the total height of the frame (including blanking) in lines. * @hfreq - the horizontal frequency in Hz. * @vsync - the height of the vertical sync in lines. * @active_width - active width of image (does not include blanking). This * information is needed only in case of version 2 of reduced blanking. * In other cases, this parameter does not have any effect on timings. * @polarities - the horizontal and vertical polarities (same as struct * v4l2_bt_timings polarities). * @interlaced - if this flag is true, it indicates interlaced format * @cap - the v4l2_dv_timings_cap capabilities. * @timings - the resulting timings. * * This function will attempt to detect if the given values correspond to a * valid CVT format. If so, then it will return true, and fmt will be filled * in with the found CVT timings. */ bool v4l2_detect_cvt(unsigned int frame_height, unsigned int hfreq, unsigned int vsync, unsigned int active_width, u32 polarities, bool interlaced, const struct v4l2_dv_timings_cap *cap, struct v4l2_dv_timings *timings) { struct v4l2_dv_timings t = {}; int v_fp, v_bp, h_fp, h_bp, hsync; int frame_width, image_height, image_width; bool reduced_blanking; bool rb_v2 = false; unsigned int pix_clk; if (vsync < 4 || vsync > 8) return false; if (polarities == V4L2_DV_VSYNC_POS_POL) reduced_blanking = false; else if (polarities == V4L2_DV_HSYNC_POS_POL) reduced_blanking = true; else return false; if (reduced_blanking && vsync == 8) rb_v2 = true; if (rb_v2 && active_width == 0) return false; if (!rb_v2 && vsync > 7) return false; if (hfreq == 0) return false; /* Vertical */ if (reduced_blanking) { if (rb_v2) { v_bp = CVT_RB_V_BPORCH; v_fp = (CVT_RB_MIN_V_BLANK * hfreq) / 1000000 + 1; v_fp -= vsync + v_bp; if (v_fp < CVT_RB_V2_MIN_V_FPORCH) v_fp = CVT_RB_V2_MIN_V_FPORCH; } else { v_fp = CVT_RB_V_FPORCH; v_bp = (CVT_RB_MIN_V_BLANK * hfreq) / 1000000 + 1; v_bp -= vsync + v_fp; if (v_bp < CVT_RB_MIN_V_BPORCH) v_bp = CVT_RB_MIN_V_BPORCH; } } else { v_fp = CVT_MIN_V_PORCH_RND; v_bp = (CVT_MIN_VSYNC_BP * hfreq) / 1000000 + 1 - vsync; if (v_bp < CVT_MIN_V_BPORCH) v_bp = CVT_MIN_V_BPORCH; } if (interlaced) image_height = (frame_height - 2 * v_fp - 2 * vsync - 2 * v_bp) & ~0x1; else image_height = (frame_height - v_fp - vsync - v_bp + 1) & ~0x1; if (image_height < 0) return false; /* Aspect ratio based on vsync */ switch (vsync) { case 4: image_width = (image_height * 4) / 3; break; case 5: image_width = (image_height * 16) / 9; break; case 6: image_width = (image_height * 16) / 10; break; case 7: /* special case */ if (image_height == 1024) image_width = (image_height * 5) / 4; else if (image_height == 768) image_width = (image_height * 15) / 9; else return false; break; case 8: image_width = active_width; break; default: return false; } if (!rb_v2) image_width = image_width & ~7; /* Horizontal */ if (reduced_blanking) { int h_blank; int clk_gran; h_blank = rb_v2 ? CVT_RB_V2_H_BLANK : CVT_RB_H_BLANK; clk_gran = rb_v2 ? CVT_PXL_CLK_GRAN_RB_V2 : CVT_PXL_CLK_GRAN; pix_clk = (image_width + h_blank) * hfreq; pix_clk = (pix_clk / clk_gran) * clk_gran; h_bp = h_blank / 2; hsync = CVT_RB_H_SYNC; h_fp = h_blank - h_bp - hsync; frame_width = image_width + h_blank; } else { unsigned ideal_duty_cycle_per_myriad = 100 * CVT_C_PRIME - (CVT_M_PRIME * 100000) / hfreq; int h_blank; if (ideal_duty_cycle_per_myriad < 2000) ideal_duty_cycle_per_myriad = 2000; h_blank = image_width * ideal_duty_cycle_per_myriad / (10000 - ideal_duty_cycle_per_myriad); h_blank = (h_blank / (2 * CVT_CELL_GRAN)) * 2 * CVT_CELL_GRAN; pix_clk = (image_width + h_blank) * hfreq; pix_clk = (pix_clk / CVT_PXL_CLK_GRAN) * CVT_PXL_CLK_GRAN; h_bp = h_blank / 2; frame_width = image_width + h_blank; hsync = frame_width * CVT_HSYNC_PERCENT / 100; hsync = (hsync / CVT_CELL_GRAN) * CVT_CELL_GRAN; h_fp = h_blank - hsync - h_bp; } t.type = V4L2_DV_BT_656_1120; t.bt.polarities = polarities; t.bt.width = image_width; t.bt.height = image_height; t.bt.hfrontporch = h_fp; t.bt.vfrontporch = v_fp; t.bt.hsync = hsync; t.bt.vsync = vsync; t.bt.hbackporch = frame_width - image_width - h_fp - hsync; if (!interlaced) { t.bt.vbackporch = frame_height - image_height - v_fp - vsync; t.bt.interlaced = V4L2_DV_PROGRESSIVE; } else { t.bt.vbackporch = (frame_height - image_height - 2 * v_fp - 2 * vsync) / 2; t.bt.il_vbackporch = frame_height - image_height - 2 * v_fp - 2 * vsync - t.bt.vbackporch; t.bt.il_vfrontporch = v_fp; t.bt.il_vsync = vsync; t.bt.flags |= V4L2_DV_FL_HALF_LINE; t.bt.interlaced = V4L2_DV_INTERLACED; } t.bt.pixelclock = pix_clk; t.bt.standards = V4L2_DV_BT_STD_CVT; if (reduced_blanking) t.bt.flags |= V4L2_DV_FL_REDUCED_BLANKING; if (!v4l2_valid_dv_timings(&t, cap, NULL, NULL)) return false; *timings = t; return true; } EXPORT_SYMBOL_GPL(v4l2_detect_cvt); /* * GTF defines * Based on Generalized Timing Formula Standard * Version 1.1 September 2, 1999 */ #define GTF_PXL_CLK_GRAN 250000 /* pixel clock granularity */ #define GTF_MIN_VSYNC_BP 550 /* min time of vsync + back porch (us) */ #define GTF_V_FP 1 /* vertical front porch (lines) */ #define GTF_CELL_GRAN 8 /* character cell granularity */ /* Default */ #define GTF_D_M 600 /* blanking formula gradient */ #define GTF_D_C 40 /* blanking formula offset */ #define GTF_D_K 128 /* blanking formula scaling factor */ #define GTF_D_J 20 /* blanking formula scaling factor */ #define GTF_D_C_PRIME ((((GTF_D_C - GTF_D_J) * GTF_D_K) / 256) + GTF_D_J) #define GTF_D_M_PRIME ((GTF_D_K * GTF_D_M) / 256) /* Secondary */ #define GTF_S_M 3600 /* blanking formula gradient */ #define GTF_S_C 40 /* blanking formula offset */ #define GTF_S_K 128 /* blanking formula scaling factor */ #define GTF_S_J 35 /* blanking formula scaling factor */ #define GTF_S_C_PRIME ((((GTF_S_C - GTF_S_J) * GTF_S_K) / 256) + GTF_S_J) #define GTF_S_M_PRIME ((GTF_S_K * GTF_S_M) / 256) /** v4l2_detect_gtf - detect if the given timings follow the GTF standard * @frame_height - the total height of the frame (including blanking) in lines. * @hfreq - the horizontal frequency in Hz. * @vsync - the height of the vertical sync in lines. * @polarities - the horizontal and vertical polarities (same as struct * v4l2_bt_timings polarities). * @interlaced - if this flag is true, it indicates interlaced format * @aspect - preferred aspect ratio. GTF has no method of determining the * aspect ratio in order to derive the image width from the * image height, so it has to be passed explicitly. Usually * the native screen aspect ratio is used for this. If it * is not filled in correctly, then 16:9 will be assumed. * @cap - the v4l2_dv_timings_cap capabilities. * @timings - the resulting timings. * * This function will attempt to detect if the given values correspond to a * valid GTF format. If so, then it will return true, and fmt will be filled * in with the found GTF timings. */ bool v4l2_detect_gtf(unsigned int frame_height, unsigned int hfreq, unsigned int vsync, u32 polarities, bool interlaced, struct v4l2_fract aspect, const struct v4l2_dv_timings_cap *cap, struct v4l2_dv_timings *timings) { struct v4l2_dv_timings t = {}; int pix_clk; int v_fp, v_bp, h_fp, hsync; int frame_width, image_height, image_width; bool default_gtf; int h_blank; if (vsync != 3) return false; if (polarities == V4L2_DV_VSYNC_POS_POL) default_gtf = true; else if (polarities == V4L2_DV_HSYNC_POS_POL) default_gtf = false; else return false; if (hfreq == 0) return false; /* Vertical */ v_fp = GTF_V_FP; v_bp = (GTF_MIN_VSYNC_BP * hfreq + 500000) / 1000000 - vsync; if (interlaced) image_height = (frame_height - 2 * v_fp - 2 * vsync - 2 * v_bp) & ~0x1; else image_height = (frame_height - v_fp - vsync - v_bp + 1) & ~0x1; if (image_height < 0) return false; if (aspect.numerator == 0 || aspect.denominator == 0) { aspect.numerator = 16; aspect.denominator = 9; } image_width = ((image_height * aspect.numerator) / aspect.denominator); image_width = (image_width + GTF_CELL_GRAN/2) & ~(GTF_CELL_GRAN - 1); /* Horizontal */ if (default_gtf) { u64 num; u32 den; num = (((u64)image_width * GTF_D_C_PRIME * hfreq) - ((u64)image_width * GTF_D_M_PRIME * 1000)); den = (hfreq * (100 - GTF_D_C_PRIME) + GTF_D_M_PRIME * 1000) * (2 * GTF_CELL_GRAN); h_blank = div_u64((num + (den >> 1)), den); h_blank *= (2 * GTF_CELL_GRAN); } else { u64 num; u32 den; num = (((u64)image_width * GTF_S_C_PRIME * hfreq) - ((u64)image_width * GTF_S_M_PRIME * 1000)); den = (hfreq * (100 - GTF_S_C_PRIME) + GTF_S_M_PRIME * 1000) * (2 * GTF_CELL_GRAN); h_blank = div_u64((num + (den >> 1)), den); h_blank *= (2 * GTF_CELL_GRAN); } frame_width = image_width + h_blank; pix_clk = (image_width + h_blank) * hfreq; pix_clk = pix_clk / GTF_PXL_CLK_GRAN * GTF_PXL_CLK_GRAN; hsync = (frame_width * 8 + 50) / 100; hsync = DIV_ROUND_CLOSEST(hsync, GTF_CELL_GRAN) * GTF_CELL_GRAN; h_fp = h_blank / 2 - hsync; t.type = V4L2_DV_BT_656_1120; t.bt.polarities = polarities; t.bt.width = image_width; t.bt.height = image_height; t.bt.hfrontporch = h_fp; t.bt.vfrontporch = v_fp; t.bt.hsync = hsync; t.bt.vsync = vsync; t.bt.hbackporch = frame_width - image_width - h_fp - hsync; if (!interlaced) { t.bt.vbackporch = frame_height - image_height - v_fp - vsync; t.bt.interlaced = V4L2_DV_PROGRESSIVE; } else { t.bt.vbackporch = (frame_height - image_height - 2 * v_fp - 2 * vsync) / 2; t.bt.il_vbackporch = frame_height - image_height - 2 * v_fp - 2 * vsync - t.bt.vbackporch; t.bt.il_vfrontporch = v_fp; t.bt.il_vsync = vsync; t.bt.flags |= V4L2_DV_FL_HALF_LINE; t.bt.interlaced = V4L2_DV_INTERLACED; } t.bt.pixelclock = pix_clk; t.bt.standards = V4L2_DV_BT_STD_GTF; if (!default_gtf) t.bt.flags |= V4L2_DV_FL_REDUCED_BLANKING; if (!v4l2_valid_dv_timings(&t, cap, NULL, NULL)) return false; *timings = t; return true; } EXPORT_SYMBOL_GPL(v4l2_detect_gtf); /** v4l2_calc_aspect_ratio - calculate the aspect ratio based on bytes * 0x15 and 0x16 from the EDID. * @hor_landscape - byte 0x15 from the EDID. * @vert_portrait - byte 0x16 from the EDID. * * Determines the aspect ratio from the EDID. * See VESA Enhanced EDID standard, release A, rev 2, section 3.6.2: * "Horizontal and Vertical Screen Size or Aspect Ratio" */ struct v4l2_fract v4l2_calc_aspect_ratio(u8 hor_landscape, u8 vert_portrait) { struct v4l2_fract aspect = { 16, 9 }; u8 ratio; /* Nothing filled in, fallback to 16:9 */ if (!hor_landscape && !vert_portrait) return aspect; /* Both filled in, so they are interpreted as the screen size in cm */ if (hor_landscape && vert_portrait) { aspect.numerator = hor_landscape; aspect.denominator = vert_portrait; return aspect; } /* Only one is filled in, so interpret them as a ratio: (val + 99) / 100 */ ratio = hor_landscape | vert_portrait; /* Change some rounded values into the exact aspect ratio */ if (ratio == 79) { aspect.numerator = 16; aspect.denominator = 9; } else if (ratio == 34) { aspect.numerator = 4; aspect.denominator = 3; } else if (ratio == 68) { aspect.numerator = 15; aspect.denominator = 9; } else { aspect.numerator = hor_landscape + 99; aspect.denominator = 100; } if (hor_landscape) return aspect; /* The aspect ratio is for portrait, so swap numerator and denominator */ swap(aspect.denominator, aspect.numerator); return aspect; } EXPORT_SYMBOL_GPL(v4l2_calc_aspect_ratio); /** v4l2_hdmi_rx_colorimetry - determine HDMI colorimetry information * based on various InfoFrames. * @avi: the AVI InfoFrame * @hdmi: the HDMI Vendor InfoFrame, may be NULL * @height: the frame height * * Determines the HDMI colorimetry information, i.e. how the HDMI * pixel color data should be interpreted. * * Note that some of the newer features (DCI-P3, HDR) are not yet * implemented: the hdmi.h header needs to be updated to the HDMI 2.0 * and CTA-861-G standards. */ struct v4l2_hdmi_colorimetry v4l2_hdmi_rx_colorimetry(const struct hdmi_avi_infoframe *avi, const struct hdmi_vendor_infoframe *hdmi, unsigned int height) { struct v4l2_hdmi_colorimetry c = { V4L2_COLORSPACE_SRGB, V4L2_YCBCR_ENC_DEFAULT, V4L2_QUANTIZATION_FULL_RANGE, V4L2_XFER_FUNC_SRGB }; bool is_ce = avi->video_code || (hdmi && hdmi->vic); bool is_sdtv = height <= 576; bool default_is_lim_range_rgb = avi->video_code > 1; switch (avi->colorspace) { case HDMI_COLORSPACE_RGB: /* RGB pixel encoding */ switch (avi->colorimetry) { case HDMI_COLORIMETRY_EXTENDED: switch (avi->extended_colorimetry) { case HDMI_EXTENDED_COLORIMETRY_OPRGB: c.colorspace = V4L2_COLORSPACE_OPRGB; c.xfer_func = V4L2_XFER_FUNC_OPRGB; break; case HDMI_EXTENDED_COLORIMETRY_BT2020: c.colorspace = V4L2_COLORSPACE_BT2020; c.xfer_func = V4L2_XFER_FUNC_709; break; default: break; } break; default: break; } switch (avi->quantization_range) { case HDMI_QUANTIZATION_RANGE_LIMITED: c.quantization = V4L2_QUANTIZATION_LIM_RANGE; break; case HDMI_QUANTIZATION_RANGE_FULL: break; default: if (default_is_lim_range_rgb) c.quantization = V4L2_QUANTIZATION_LIM_RANGE; break; } break; default: /* YCbCr pixel encoding */ c.quantization = V4L2_QUANTIZATION_LIM_RANGE; switch (avi->colorimetry) { case HDMI_COLORIMETRY_NONE: if (!is_ce) break; if (is_sdtv) { c.colorspace = V4L2_COLORSPACE_SMPTE170M; c.ycbcr_enc = V4L2_YCBCR_ENC_601; } else { c.colorspace = V4L2_COLORSPACE_REC709; c.ycbcr_enc = V4L2_YCBCR_ENC_709; } c.xfer_func = V4L2_XFER_FUNC_709; break; case HDMI_COLORIMETRY_ITU_601: c.colorspace = V4L2_COLORSPACE_SMPTE170M; c.ycbcr_enc = V4L2_YCBCR_ENC_601; c.xfer_func = V4L2_XFER_FUNC_709; break; case HDMI_COLORIMETRY_ITU_709: c.colorspace = V4L2_COLORSPACE_REC709; c.ycbcr_enc = V4L2_YCBCR_ENC_709; c.xfer_func = V4L2_XFER_FUNC_709; break; case HDMI_COLORIMETRY_EXTENDED: switch (avi->extended_colorimetry) { case HDMI_EXTENDED_COLORIMETRY_XV_YCC_601: c.colorspace = V4L2_COLORSPACE_REC709; c.ycbcr_enc = V4L2_YCBCR_ENC_XV709; c.xfer_func = V4L2_XFER_FUNC_709; break; case HDMI_EXTENDED_COLORIMETRY_XV_YCC_709: c.colorspace = V4L2_COLORSPACE_REC709; c.ycbcr_enc = V4L2_YCBCR_ENC_XV601; c.xfer_func = V4L2_XFER_FUNC_709; break; case HDMI_EXTENDED_COLORIMETRY_S_YCC_601: c.colorspace = V4L2_COLORSPACE_SRGB; c.ycbcr_enc = V4L2_YCBCR_ENC_601; c.xfer_func = V4L2_XFER_FUNC_SRGB; break; case HDMI_EXTENDED_COLORIMETRY_OPYCC_601: c.colorspace = V4L2_COLORSPACE_OPRGB; c.ycbcr_enc = V4L2_YCBCR_ENC_601; c.xfer_func = V4L2_XFER_FUNC_OPRGB; break; case HDMI_EXTENDED_COLORIMETRY_BT2020: c.colorspace = V4L2_COLORSPACE_BT2020; c.ycbcr_enc = V4L2_YCBCR_ENC_BT2020; c.xfer_func = V4L2_XFER_FUNC_709; break; case HDMI_EXTENDED_COLORIMETRY_BT2020_CONST_LUM: c.colorspace = V4L2_COLORSPACE_BT2020; c.ycbcr_enc = V4L2_YCBCR_ENC_BT2020_CONST_LUM; c.xfer_func = V4L2_XFER_FUNC_709; break; default: /* fall back to ITU_709 */ c.colorspace = V4L2_COLORSPACE_REC709; c.ycbcr_enc = V4L2_YCBCR_ENC_709; c.xfer_func = V4L2_XFER_FUNC_709; break; } break; default: break; } /* * YCC Quantization Range signaling is more-or-less broken, * let's just ignore this. */ break; } return c; } EXPORT_SYMBOL_GPL(v4l2_hdmi_rx_colorimetry); /** * v4l2_num_edid_blocks() - return the number of EDID blocks * * @edid: pointer to the EDID data * @max_blocks: maximum number of supported EDID blocks * * Return: the number of EDID blocks based on the contents of the EDID. * This supports the HDMI Forum EDID Extension Override Data Block. */ unsigned int v4l2_num_edid_blocks(const u8 *edid, unsigned int max_blocks) { unsigned int blocks; if (!edid || !max_blocks) return 0; // The number of extension blocks is recorded at byte 126 of the // first 128-byte block in the EDID. // // If there is an HDMI Forum EDID Extension Override Data Block // present, then it is in bytes 4-6 of the first CTA-861 extension // block of the EDID. blocks = edid[126] + 1; // Check for HDMI Forum EDID Extension Override Data Block if (blocks >= 2 && // The EDID must be at least 2 blocks max_blocks >= 3 && // The caller supports at least 3 blocks edid[128] == 2 && // The first extension block is type CTA-861 edid[133] == 0x78 && // Identifier for the EEODB (edid[132] & 0xe0) == 0xe0 && // Tag Code == 7 (edid[132] & 0x1f) >= 2 && // Length >= 2 edid[134] > 1) // Number of extension blocks is sane blocks = edid[134] + 1; return blocks > max_blocks ? max_blocks : blocks; } EXPORT_SYMBOL_GPL(v4l2_num_edid_blocks); /** * v4l2_get_edid_phys_addr() - find and return the physical address * * @edid: pointer to the EDID data * @size: size in bytes of the EDID data * @offset: If not %NULL then the location of the physical address * bytes in the EDID will be returned here. This is set to 0 * if there is no physical address found. * * Return: the physical address or CEC_PHYS_ADDR_INVALID if there is none. */ u16 v4l2_get_edid_phys_addr(const u8 *edid, unsigned int size, unsigned int *offset) { unsigned int loc = cec_get_edid_spa_location(edid, size); if (offset) *offset = loc; if (loc == 0) return CEC_PHYS_ADDR_INVALID; return (edid[loc] << 8) | edid[loc + 1]; } EXPORT_SYMBOL_GPL(v4l2_get_edid_phys_addr); /** * v4l2_set_edid_phys_addr() - find and set the physical address * * @edid: pointer to the EDID data * @size: size in bytes of the EDID data * @phys_addr: the new physical address * * This function finds the location of the physical address in the EDID * and fills in the given physical address and updates the checksum * at the end of the EDID block. It does nothing if the EDID doesn't * contain a physical address. */ void v4l2_set_edid_phys_addr(u8 *edid, unsigned int size, u16 phys_addr) { unsigned int loc = cec_get_edid_spa_location(edid, size); u8 sum = 0; unsigned int i; if (loc == 0) return; edid[loc] = phys_addr >> 8; edid[loc + 1] = phys_addr & 0xff; loc &= ~0x7f; /* update the checksum */ for (i = loc; i < loc + 127; i++) sum += edid[i]; edid[i] = 256 - sum; } EXPORT_SYMBOL_GPL(v4l2_set_edid_phys_addr); /** * v4l2_phys_addr_for_input() - calculate the PA for an input * * @phys_addr: the physical address of the parent * @input: the number of the input port, must be between 1 and 15 * * This function calculates a new physical address based on the input * port number. For example: * * PA = 0.0.0.0 and input = 2 becomes 2.0.0.0 * * PA = 3.0.0.0 and input = 1 becomes 3.1.0.0 * * PA = 3.2.1.0 and input = 5 becomes 3.2.1.5 * * PA = 3.2.1.3 and input = 5 becomes f.f.f.f since it maxed out the depth. * * Return: the new physical address or CEC_PHYS_ADDR_INVALID. */ u16 v4l2_phys_addr_for_input(u16 phys_addr, u8 input) { /* Check if input is sane */ if (WARN_ON(input == 0 || input > 0xf)) return CEC_PHYS_ADDR_INVALID; if (phys_addr == 0) return input << 12; if ((phys_addr & 0x0fff) == 0) return phys_addr | (input << 8); if ((phys_addr & 0x00ff) == 0) return phys_addr | (input << 4); if ((phys_addr & 0x000f) == 0) return phys_addr | input; /* * All nibbles are used so no valid physical addresses can be assigned * to the input. */ return CEC_PHYS_ADDR_INVALID; } EXPORT_SYMBOL_GPL(v4l2_phys_addr_for_input); /** * v4l2_phys_addr_validate() - validate a physical address from an EDID * * @phys_addr: the physical address to validate * @parent: if not %NULL, then this is filled with the parents PA. * @port: if not %NULL, then this is filled with the input port. * * This validates a physical address as read from an EDID. If the * PA is invalid (such as 1.0.1.0 since '0' is only allowed at the end), * then it will return -EINVAL. * * The parent PA is passed into %parent and the input port is passed into * %port. For example: * * PA = 0.0.0.0: has parent 0.0.0.0 and input port 0. * * PA = 1.0.0.0: has parent 0.0.0.0 and input port 1. * * PA = 3.2.0.0: has parent 3.0.0.0 and input port 2. * * PA = f.f.f.f: has parent f.f.f.f and input port 0. * * Return: 0 if the PA is valid, -EINVAL if not. */ int v4l2_phys_addr_validate(u16 phys_addr, u16 *parent, u16 *port) { int i; if (parent) *parent = phys_addr; if (port) *port = 0; if (phys_addr == CEC_PHYS_ADDR_INVALID) return 0; for (i = 0; i < 16; i += 4) if (phys_addr & (0xf << i)) break; if (i == 16) return 0; if (parent) *parent = phys_addr & (0xfff0 << i); if (port) *port = (phys_addr >> i) & 0xf; for (i += 4; i < 16; i += 4) if ((phys_addr & (0xf << i)) == 0) return -EINVAL; return 0; } EXPORT_SYMBOL_GPL(v4l2_phys_addr_validate); #ifdef CONFIG_DEBUG_FS #define DEBUGFS_FOPS(type, flag) \ static ssize_t \ infoframe_read_##type(struct file *filp, \ char __user *ubuf, size_t count, loff_t *ppos) \ { \ struct v4l2_debugfs_if *infoframes = filp->private_data; \ \ return infoframes->if_read((flag), infoframes->priv, filp, \ ubuf, count, ppos); \ } \ \ static const struct file_operations infoframe_##type##_fops = { \ .owner = THIS_MODULE, \ .open = simple_open, \ .read = infoframe_read_##type, \ } DEBUGFS_FOPS(avi, V4L2_DEBUGFS_IF_AVI); DEBUGFS_FOPS(audio, V4L2_DEBUGFS_IF_AUDIO); DEBUGFS_FOPS(spd, V4L2_DEBUGFS_IF_SPD); DEBUGFS_FOPS(hdmi, V4L2_DEBUGFS_IF_HDMI); struct v4l2_debugfs_if *v4l2_debugfs_if_alloc(struct dentry *root, u32 if_types, void *priv, v4l2_debugfs_if_read_t if_read) { struct v4l2_debugfs_if *infoframes; if (IS_ERR_OR_NULL(root) || !if_types || !if_read) return NULL; infoframes = kzalloc(sizeof(*infoframes), GFP_KERNEL); if (!infoframes) return NULL; infoframes->if_dir = debugfs_create_dir("infoframes", root); infoframes->priv = priv; infoframes->if_read = if_read; if (if_types & V4L2_DEBUGFS_IF_AVI) debugfs_create_file("avi", 0400, infoframes->if_dir, infoframes, &infoframe_avi_fops); if (if_types & V4L2_DEBUGFS_IF_AUDIO) debugfs_create_file("audio", 0400, infoframes->if_dir, infoframes, &infoframe_audio_fops); if (if_types & V4L2_DEBUGFS_IF_SPD) debugfs_create_file("spd", 0400, infoframes->if_dir, infoframes, &infoframe_spd_fops); if (if_types & V4L2_DEBUGFS_IF_HDMI) debugfs_create_file("hdmi", 0400, infoframes->if_dir, infoframes, &infoframe_hdmi_fops); return infoframes; } EXPORT_SYMBOL_GPL(v4l2_debugfs_if_alloc); void v4l2_debugfs_if_free(struct v4l2_debugfs_if *infoframes) { if (infoframes) { debugfs_remove_recursive(infoframes->if_dir); kfree(infoframes); } } EXPORT_SYMBOL_GPL(v4l2_debugfs_if_free); #endif
7 2 1 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 // SPDX-License-Identifier: GPL-2.0 /* * queue_stack_maps.c: BPF queue and stack maps * * Copyright (c) 2018 Politecnico di Torino */ #include <linux/bpf.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/btf_ids.h> #include "percpu_freelist.h" #include <asm/rqspinlock.h> #define QUEUE_STACK_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK) struct bpf_queue_stack { struct bpf_map map; rqspinlock_t lock; u32 head, tail; u32 size; /* max_entries + 1 */ char elements[] __aligned(8); }; static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map) { return container_of(map, struct bpf_queue_stack, map); } static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs) { return qs->head == qs->tail; } static bool queue_stack_map_is_full(struct bpf_queue_stack *qs) { u32 head = qs->head + 1; if (unlikely(head >= qs->size)) head = 0; return head == qs->tail; } /* Called from syscall */ static int queue_stack_map_alloc_check(union bpf_attr *attr) { /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 0 || attr->value_size == 0 || attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK || !bpf_map_flags_access_ok(attr->map_flags)) return -EINVAL; if (attr->value_size > KMALLOC_MAX_SIZE) /* if value_size is bigger, the user space won't be able to * access the elements. */ return -E2BIG; return 0; } static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) { int numa_node = bpf_map_attr_numa_node(attr); struct bpf_queue_stack *qs; u64 size, queue_size; size = (u64) attr->max_entries + 1; queue_size = sizeof(*qs) + size * attr->value_size; qs = bpf_map_area_alloc(queue_size, numa_node); if (!qs) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&qs->map, attr); qs->size = size; raw_res_spin_lock_init(&qs->lock); return &qs->map; } /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ static void queue_stack_map_free(struct bpf_map *map) { struct bpf_queue_stack *qs = bpf_queue_stack(map); bpf_map_area_free(qs); } static long __queue_map_get(struct bpf_map *map, void *value, bool delete) { struct bpf_queue_stack *qs = bpf_queue_stack(map); unsigned long flags; int err = 0; void *ptr; if (raw_res_spin_lock_irqsave(&qs->lock, flags)) return -EBUSY; if (queue_stack_map_is_empty(qs)) { memset(value, 0, qs->map.value_size); err = -ENOENT; goto out; } ptr = &qs->elements[qs->tail * qs->map.value_size]; memcpy(value, ptr, qs->map.value_size); if (delete) { if (unlikely(++qs->tail >= qs->size)) qs->tail = 0; } out: raw_res_spin_unlock_irqrestore(&qs->lock, flags); return err; } static long __stack_map_get(struct bpf_map *map, void *value, bool delete) { struct bpf_queue_stack *qs = bpf_queue_stack(map); unsigned long flags; int err = 0; void *ptr; u32 index; if (raw_res_spin_lock_irqsave(&qs->lock, flags)) return -EBUSY; if (queue_stack_map_is_empty(qs)) { memset(value, 0, qs->map.value_size); err = -ENOENT; goto out; } index = qs->head - 1; if (unlikely(index >= qs->size)) index = qs->size - 1; ptr = &qs->elements[index * qs->map.value_size]; memcpy(value, ptr, qs->map.value_size); if (delete) qs->head = index; out: raw_res_spin_unlock_irqrestore(&qs->lock, flags); return err; } /* Called from syscall or from eBPF program */ static long queue_map_peek_elem(struct bpf_map *map, void *value) { return __queue_map_get(map, value, false); } /* Called from syscall or from eBPF program */ static long stack_map_peek_elem(struct bpf_map *map, void *value) { return __stack_map_get(map, value, false); } /* Called from syscall or from eBPF program */ static long queue_map_pop_elem(struct bpf_map *map, void *value) { return __queue_map_get(map, value, true); } /* Called from syscall or from eBPF program */ static long stack_map_pop_elem(struct bpf_map *map, void *value) { return __stack_map_get(map, value, true); } /* Called from syscall or from eBPF program */ static long queue_stack_map_push_elem(struct bpf_map *map, void *value, u64 flags) { struct bpf_queue_stack *qs = bpf_queue_stack(map); unsigned long irq_flags; int err = 0; void *dst; /* BPF_EXIST is used to force making room for a new element in case the * map is full */ bool replace = (flags & BPF_EXIST); /* Check supported flags for queue and stack maps */ if (flags & BPF_NOEXIST || flags > BPF_EXIST) return -EINVAL; if (raw_res_spin_lock_irqsave(&qs->lock, irq_flags)) return -EBUSY; if (queue_stack_map_is_full(qs)) { if (!replace) { err = -E2BIG; goto out; } /* advance tail pointer to overwrite oldest element */ if (unlikely(++qs->tail >= qs->size)) qs->tail = 0; } dst = &qs->elements[qs->head * qs->map.value_size]; memcpy(dst, value, qs->map.value_size); if (unlikely(++qs->head >= qs->size)) qs->head = 0; out: raw_res_spin_unlock_irqrestore(&qs->lock, irq_flags); return err; } /* Called from syscall or from eBPF program */ static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key) { return NULL; } /* Called from syscall or from eBPF program */ static long queue_stack_map_update_elem(struct bpf_map *map, void *key, void *value, u64 flags) { return -EINVAL; } /* Called from syscall or from eBPF program */ static long queue_stack_map_delete_elem(struct bpf_map *map, void *key) { return -EINVAL; } /* Called from syscall */ static int queue_stack_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { return -EINVAL; } static u64 queue_stack_map_mem_usage(const struct bpf_map *map) { u64 usage = sizeof(struct bpf_queue_stack); usage += ((u64)map->max_entries + 1) * map->value_size; return usage; } BTF_ID_LIST_SINGLE(queue_map_btf_ids, struct, bpf_queue_stack) const struct bpf_map_ops queue_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = queue_stack_map_alloc_check, .map_alloc = queue_stack_map_alloc, .map_free = queue_stack_map_free, .map_lookup_elem = queue_stack_map_lookup_elem, .map_update_elem = queue_stack_map_update_elem, .map_delete_elem = queue_stack_map_delete_elem, .map_push_elem = queue_stack_map_push_elem, .map_pop_elem = queue_map_pop_elem, .map_peek_elem = queue_map_peek_elem, .map_get_next_key = queue_stack_map_get_next_key, .map_mem_usage = queue_stack_map_mem_usage, .map_btf_id = &queue_map_btf_ids[0], }; const struct bpf_map_ops stack_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = queue_stack_map_alloc_check, .map_alloc = queue_stack_map_alloc, .map_free = queue_stack_map_free, .map_lookup_elem = queue_stack_map_lookup_elem, .map_update_elem = queue_stack_map_update_elem, .map_delete_elem = queue_stack_map_delete_elem, .map_push_elem = queue_stack_map_push_elem, .map_pop_elem = stack_map_pop_elem, .map_peek_elem = stack_map_peek_elem, .map_get_next_key = queue_stack_map_get_next_key, .map_mem_usage = queue_stack_map_mem_usage, .map_btf_id = &queue_map_btf_ids[0], };
3 3 3 2 1 3 3 3 3 3 3 3 3 3 8 8 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 // SPDX-License-Identifier: GPL-2.0+ /* * Driver for Realtek RTS51xx USB card reader * * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved. * * Author: * wwang (wei_wang@realsil.com.cn) * No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China */ #include <linux/module.h> #include <linux/blkdev.h> #include <linux/kthread.h> #include <linux/sched.h> #include <linux/kernel.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_device.h> #include <linux/cdrom.h> #include <linux/usb.h> #include <linux/slab.h> #include <linux/usb_usual.h> #include "usb.h" #include "transport.h" #include "protocol.h" #include "debug.h" #include "scsiglue.h" #define DRV_NAME "ums-realtek" MODULE_DESCRIPTION("Driver for Realtek USB Card Reader"); MODULE_AUTHOR("wwang <wei_wang@realsil.com.cn>"); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS("USB_STORAGE"); static int auto_delink_en = 1; module_param(auto_delink_en, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(auto_delink_en, "auto delink mode (0=firmware, 1=software [default])"); #ifdef CONFIG_REALTEK_AUTOPM static int ss_en = 1; module_param(ss_en, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(ss_en, "enable selective suspend"); static int ss_delay = 50; module_param(ss_delay, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(ss_delay, "seconds to delay before entering selective suspend"); enum RTS51X_STAT { RTS51X_STAT_INIT, RTS51X_STAT_IDLE, RTS51X_STAT_RUN, RTS51X_STAT_SS }; #define POLLING_INTERVAL 50 #define rts51x_set_stat(chip, stat) \ ((chip)->state = (enum RTS51X_STAT)(stat)) #define rts51x_get_stat(chip) ((chip)->state) #define SET_LUN_READY(chip, lun) ((chip)->lun_ready |= ((u8)1 << (lun))) #define CLR_LUN_READY(chip, lun) ((chip)->lun_ready &= ~((u8)1 << (lun))) #define TST_LUN_READY(chip, lun) ((chip)->lun_ready & ((u8)1 << (lun))) #endif struct rts51x_status { u16 vid; u16 pid; u8 cur_lun; u8 card_type; u8 total_lun; u16 fw_ver; u8 phy_exist; u8 multi_flag; u8 multi_card; u8 log_exist; union { u8 detailed_type1; u8 detailed_type2; } detailed_type; u8 function[2]; }; struct rts51x_chip { u16 vendor_id; u16 product_id; char max_lun; struct rts51x_status *status; int status_len; u32 flag; struct us_data *us; #ifdef CONFIG_REALTEK_AUTOPM struct timer_list rts51x_suspend_timer; unsigned long timer_expires; int pwr_state; u8 lun_ready; enum RTS51X_STAT state; int support_auto_delink; #endif /* used to back up the protocol chosen in probe1 phase */ proto_cmnd proto_handler_backup; }; /* flag definition */ #define FLIDX_AUTO_DELINK 0x01 #define SCSI_LUN(srb) ((srb)->device->lun) /* Bit Operation */ #define SET_BIT(data, idx) ((data) |= 1 << (idx)) #define CLR_BIT(data, idx) ((data) &= ~(1 << (idx))) #define CHK_BIT(data, idx) ((data) & (1 << (idx))) #define SET_AUTO_DELINK(chip) ((chip)->flag |= FLIDX_AUTO_DELINK) #define CLR_AUTO_DELINK(chip) ((chip)->flag &= ~FLIDX_AUTO_DELINK) #define CHK_AUTO_DELINK(chip) ((chip)->flag & FLIDX_AUTO_DELINK) #define RTS51X_GET_VID(chip) ((chip)->vendor_id) #define RTS51X_GET_PID(chip) ((chip)->product_id) #define VENDOR_ID(chip) ((chip)->status[0].vid) #define PRODUCT_ID(chip) ((chip)->status[0].pid) #define FW_VERSION(chip) ((chip)->status[0].fw_ver) #define STATUS_LEN(chip) ((chip)->status_len) #define STATUS_SUCCESS 0 #define STATUS_FAIL 1 /* Check card reader function */ #define SUPPORT_DETAILED_TYPE1(chip) \ CHK_BIT((chip)->status[0].function[0], 1) #define SUPPORT_OT(chip) \ CHK_BIT((chip)->status[0].function[0], 2) #define SUPPORT_OC(chip) \ CHK_BIT((chip)->status[0].function[0], 3) #define SUPPORT_AUTO_DELINK(chip) \ CHK_BIT((chip)->status[0].function[0], 4) #define SUPPORT_SDIO(chip) \ CHK_BIT((chip)->status[0].function[1], 0) #define SUPPORT_DETAILED_TYPE2(chip) \ CHK_BIT((chip)->status[0].function[1], 1) #define CHECK_PID(chip, pid) (RTS51X_GET_PID(chip) == (pid)) #define CHECK_FW_VER(chip, fw_ver) (FW_VERSION(chip) == (fw_ver)) #define CHECK_ID(chip, pid, fw_ver) \ (CHECK_PID((chip), (pid)) && CHECK_FW_VER((chip), (fw_ver))) static int init_realtek_cr(struct us_data *us); /* * The table of devices */ #define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \ vendorName, productName, useProtocol, useTransport, \ initFunction, flags) \ {\ USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax), \ .driver_info = (flags) \ } static const struct usb_device_id realtek_cr_ids[] = { # include "unusual_realtek.h" {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, realtek_cr_ids); #undef UNUSUAL_DEV /* * The flags table */ #define UNUSUAL_DEV(idVendor, idProduct, bcdDeviceMin, bcdDeviceMax, \ vendor_name, product_name, use_protocol, use_transport, \ init_function, Flags) \ { \ .vendorName = vendor_name, \ .productName = product_name, \ .useProtocol = use_protocol, \ .useTransport = use_transport, \ .initFunction = init_function, \ } static const struct us_unusual_dev realtek_cr_unusual_dev_list[] = { # include "unusual_realtek.h" {} /* Terminating entry */ }; #undef UNUSUAL_DEV static int rts51x_bulk_transport(struct us_data *us, u8 lun, u8 *cmd, int cmd_len, u8 *buf, int buf_len, enum dma_data_direction dir, int *act_len) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *)us->iobuf; struct bulk_cs_wrap *bcs = (struct bulk_cs_wrap *)us->iobuf; int result; unsigned int residue; unsigned int cswlen; unsigned int cbwlen = US_BULK_CB_WRAP_LEN; /* set up the command wrapper */ bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = cpu_to_le32(buf_len); bcb->Flags = (dir == DMA_FROM_DEVICE) ? US_BULK_FLAG_IN : US_BULK_FLAG_OUT; bcb->Tag = ++us->tag; bcb->Lun = lun; bcb->Length = cmd_len; /* copy the command payload */ memset(bcb->CDB, 0, sizeof(bcb->CDB)); memcpy(bcb->CDB, cmd, bcb->Length); /* send it to out endpoint */ result = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, bcb, cbwlen, NULL); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; /* DATA STAGE */ /* send/receive data payload, if there is any */ if (buf && buf_len) { unsigned int pipe = (dir == DMA_FROM_DEVICE) ? us->recv_bulk_pipe : us->send_bulk_pipe; result = usb_stor_bulk_transfer_buf(us, pipe, buf, buf_len, NULL); if (result == USB_STOR_XFER_ERROR) return USB_STOR_TRANSPORT_ERROR; } /* get CSW for device status */ result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, bcs, US_BULK_CS_WRAP_LEN, &cswlen); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; /* check bulk status */ if (bcs->Signature != cpu_to_le32(US_BULK_CS_SIGN)) { usb_stor_dbg(us, "Signature mismatch: got %08X, expecting %08X\n", le32_to_cpu(bcs->Signature), US_BULK_CS_SIGN); return USB_STOR_TRANSPORT_ERROR; } residue = bcs->Residue; if (bcs->Tag != us->tag) return USB_STOR_TRANSPORT_ERROR; /* * try to compute the actual residue, based on how much data * was really transferred and what the device tells us */ if (residue) residue = residue < buf_len ? residue : buf_len; if (act_len) *act_len = buf_len - residue; /* based on the status code, we report good or bad */ switch (bcs->Status) { case US_BULK_STAT_OK: /* command good -- note that data could be short */ return USB_STOR_TRANSPORT_GOOD; case US_BULK_STAT_FAIL: /* command failed */ return USB_STOR_TRANSPORT_FAILED; case US_BULK_STAT_PHASE: /* * phase error -- note that a transport reset will be * invoked by the invoke_transport() function */ return USB_STOR_TRANSPORT_ERROR; } /* we should never get here, but if we do, we're in trouble */ return USB_STOR_TRANSPORT_ERROR; } static int rts51x_bulk_transport_special(struct us_data *us, u8 lun, u8 *cmd, int cmd_len, u8 *buf, int buf_len, enum dma_data_direction dir, int *act_len) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; struct bulk_cs_wrap *bcs = (struct bulk_cs_wrap *) us->iobuf; int result; unsigned int cswlen; unsigned int cbwlen = US_BULK_CB_WRAP_LEN; /* set up the command wrapper */ bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = cpu_to_le32(buf_len); bcb->Flags = (dir == DMA_FROM_DEVICE) ? US_BULK_FLAG_IN : US_BULK_FLAG_OUT; bcb->Tag = ++us->tag; bcb->Lun = lun; bcb->Length = cmd_len; /* copy the command payload */ memset(bcb->CDB, 0, sizeof(bcb->CDB)); memcpy(bcb->CDB, cmd, bcb->Length); /* send it to out endpoint */ result = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, bcb, cbwlen, NULL); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; /* DATA STAGE */ /* send/receive data payload, if there is any */ if (buf && buf_len) { unsigned int pipe = (dir == DMA_FROM_DEVICE) ? us->recv_bulk_pipe : us->send_bulk_pipe; result = usb_stor_bulk_transfer_buf(us, pipe, buf, buf_len, NULL); if (result == USB_STOR_XFER_ERROR) return USB_STOR_TRANSPORT_ERROR; } /* get CSW for device status */ result = usb_bulk_msg(us->pusb_dev, us->recv_bulk_pipe, bcs, US_BULK_CS_WRAP_LEN, &cswlen, 250); return result; } /* Determine what the maximum LUN supported is */ static int rts51x_get_max_lun(struct us_data *us) { int result; /* issue the command */ us->iobuf[0] = 0; result = usb_stor_control_msg(us, us->recv_ctrl_pipe, US_BULK_GET_MAX_LUN, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, us->ifnum, us->iobuf, 1, 10 * HZ); usb_stor_dbg(us, "GetMaxLUN command result is %d, data is %d\n", result, us->iobuf[0]); /* if we have a successful request, return the result */ if (result > 0) return us->iobuf[0]; return 0; } static int rts51x_read_mem(struct us_data *us, u16 addr, u8 *data, u16 len) { int retval; u8 cmnd[12] = { 0 }; u8 *buf; buf = kmalloc(len, GFP_NOIO); if (buf == NULL) return -ENOMEM; usb_stor_dbg(us, "addr = 0x%x, len = %d\n", addr, len); cmnd[0] = 0xF0; cmnd[1] = 0x0D; cmnd[2] = (u8) (addr >> 8); cmnd[3] = (u8) addr; cmnd[4] = (u8) (len >> 8); cmnd[5] = (u8) len; retval = rts51x_bulk_transport(us, 0, cmnd, 12, buf, len, DMA_FROM_DEVICE, NULL); if (retval != USB_STOR_TRANSPORT_GOOD) { kfree(buf); return -EIO; } memcpy(data, buf, len); kfree(buf); return 0; } static int rts51x_write_mem(struct us_data *us, u16 addr, u8 *data, u16 len) { int retval; u8 cmnd[12] = { 0 }; u8 *buf; buf = kmemdup(data, len, GFP_NOIO); if (buf == NULL) return USB_STOR_TRANSPORT_ERROR; usb_stor_dbg(us, "addr = 0x%x, len = %d\n", addr, len); cmnd[0] = 0xF0; cmnd[1] = 0x0E; cmnd[2] = (u8) (addr >> 8); cmnd[3] = (u8) addr; cmnd[4] = (u8) (len >> 8); cmnd[5] = (u8) len; retval = rts51x_bulk_transport(us, 0, cmnd, 12, buf, len, DMA_TO_DEVICE, NULL); kfree(buf); if (retval != USB_STOR_TRANSPORT_GOOD) return -EIO; return 0; } static int rts51x_read_status(struct us_data *us, u8 lun, u8 *status, int len, int *actlen) { int retval; u8 cmnd[12] = { 0 }; u8 *buf; buf = kmalloc(len, GFP_NOIO); if (buf == NULL) return USB_STOR_TRANSPORT_ERROR; usb_stor_dbg(us, "lun = %d\n", lun); cmnd[0] = 0xF0; cmnd[1] = 0x09; retval = rts51x_bulk_transport(us, lun, cmnd, 12, buf, len, DMA_FROM_DEVICE, actlen); if (retval != USB_STOR_TRANSPORT_GOOD) { kfree(buf); return -EIO; } memcpy(status, buf, len); kfree(buf); return 0; } static int rts51x_check_status(struct us_data *us, u8 lun) { struct rts51x_chip *chip = (struct rts51x_chip *)(us->extra); int retval; u8 buf[16]; retval = rts51x_read_status(us, lun, buf, 16, &(chip->status_len)); if (retval != STATUS_SUCCESS) return -EIO; usb_stor_dbg(us, "chip->status_len = %d\n", chip->status_len); chip->status[lun].vid = ((u16) buf[0] << 8) | buf[1]; chip->status[lun].pid = ((u16) buf[2] << 8) | buf[3]; chip->status[lun].cur_lun = buf[4]; chip->status[lun].card_type = buf[5]; chip->status[lun].total_lun = buf[6]; chip->status[lun].fw_ver = ((u16) buf[7] << 8) | buf[8]; chip->status[lun].phy_exist = buf[9]; chip->status[lun].multi_flag = buf[10]; chip->status[lun].multi_card = buf[11]; chip->status[lun].log_exist = buf[12]; if (chip->status_len == 16) { chip->status[lun].detailed_type.detailed_type1 = buf[13]; chip->status[lun].function[0] = buf[14]; chip->status[lun].function[1] = buf[15]; } return 0; } static int enable_oscillator(struct us_data *us) { int retval; u8 value; retval = rts51x_read_mem(us, 0xFE77, &value, 1); if (retval < 0) return -EIO; value |= 0x04; retval = rts51x_write_mem(us, 0xFE77, &value, 1); if (retval < 0) return -EIO; retval = rts51x_read_mem(us, 0xFE77, &value, 1); if (retval < 0) return -EIO; if (!(value & 0x04)) return -EIO; return 0; } static int __do_config_autodelink(struct us_data *us, u8 *data, u16 len) { int retval; u8 cmnd[12] = {0}; u8 *buf; usb_stor_dbg(us, "addr = 0xfe47, len = %d\n", len); buf = kmemdup(data, len, GFP_NOIO); if (!buf) return USB_STOR_TRANSPORT_ERROR; cmnd[0] = 0xF0; cmnd[1] = 0x0E; cmnd[2] = 0xfe; cmnd[3] = 0x47; cmnd[4] = (u8)(len >> 8); cmnd[5] = (u8)len; retval = rts51x_bulk_transport_special(us, 0, cmnd, 12, buf, len, DMA_TO_DEVICE, NULL); kfree(buf); if (retval != USB_STOR_TRANSPORT_GOOD) { return -EIO; } return 0; } static int do_config_autodelink(struct us_data *us, int enable, int force) { int retval; u8 value; retval = rts51x_read_mem(us, 0xFE47, &value, 1); if (retval < 0) return -EIO; if (enable) { if (force) value |= 0x03; else value |= 0x01; } else { value &= ~0x03; } usb_stor_dbg(us, "set 0xfe47 to 0x%x\n", value); /* retval = rts51x_write_mem(us, 0xFE47, &value, 1); */ retval = __do_config_autodelink(us, &value, 1); if (retval < 0) return -EIO; return 0; } static int config_autodelink_after_power_on(struct us_data *us) { struct rts51x_chip *chip = (struct rts51x_chip *)(us->extra); int retval; u8 value; if (!CHK_AUTO_DELINK(chip)) return 0; retval = rts51x_read_mem(us, 0xFE47, &value, 1); if (retval < 0) return -EIO; if (auto_delink_en) { CLR_BIT(value, 0); CLR_BIT(value, 1); SET_BIT(value, 2); if (CHECK_ID(chip, 0x0138, 0x3882)) CLR_BIT(value, 2); SET_BIT(value, 7); /* retval = rts51x_write_mem(us, 0xFE47, &value, 1); */ retval = __do_config_autodelink(us, &value, 1); if (retval < 0) return -EIO; retval = enable_oscillator(us); if (retval == 0) (void)do_config_autodelink(us, 1, 0); } else { /* Autodelink controlled by firmware */ SET_BIT(value, 2); if (CHECK_ID(chip, 0x0138, 0x3882)) CLR_BIT(value, 2); if (CHECK_ID(chip, 0x0159, 0x5889) || CHECK_ID(chip, 0x0138, 0x3880)) { CLR_BIT(value, 0); CLR_BIT(value, 7); } /* retval = rts51x_write_mem(us, 0xFE47, &value, 1); */ retval = __do_config_autodelink(us, &value, 1); if (retval < 0) return -EIO; if (CHECK_ID(chip, 0x0159, 0x5888)) { value = 0xFF; retval = rts51x_write_mem(us, 0xFE79, &value, 1); if (retval < 0) return -EIO; value = 0x01; retval = rts51x_write_mem(us, 0x48, &value, 1); if (retval < 0) return -EIO; } } return 0; } #ifdef CONFIG_PM static int config_autodelink_before_power_down(struct us_data *us) { struct rts51x_chip *chip = (struct rts51x_chip *)(us->extra); int retval; u8 value; if (!CHK_AUTO_DELINK(chip)) return 0; if (auto_delink_en) { retval = rts51x_read_mem(us, 0xFE77, &value, 1); if (retval < 0) return -EIO; SET_BIT(value, 2); retval = rts51x_write_mem(us, 0xFE77, &value, 1); if (retval < 0) return -EIO; if (CHECK_ID(chip, 0x0159, 0x5888)) { value = 0x01; retval = rts51x_write_mem(us, 0x48, &value, 1); if (retval < 0) return -EIO; } retval = rts51x_read_mem(us, 0xFE47, &value, 1); if (retval < 0) return -EIO; SET_BIT(value, 0); if (CHECK_ID(chip, 0x0138, 0x3882)) SET_BIT(value, 2); retval = rts51x_write_mem(us, 0xFE77, &value, 1); if (retval < 0) return -EIO; } else { if (CHECK_ID(chip, 0x0159, 0x5889) || CHECK_ID(chip, 0x0138, 0x3880) || CHECK_ID(chip, 0x0138, 0x3882)) { retval = rts51x_read_mem(us, 0xFE47, &value, 1); if (retval < 0) return -EIO; if (CHECK_ID(chip, 0x0159, 0x5889) || CHECK_ID(chip, 0x0138, 0x3880)) { SET_BIT(value, 0); SET_BIT(value, 7); } if (CHECK_ID(chip, 0x0138, 0x3882)) SET_BIT(value, 2); /* retval = rts51x_write_mem(us, 0xFE47, &value, 1); */ retval = __do_config_autodelink(us, &value, 1); if (retval < 0) return -EIO; } if (CHECK_ID(chip, 0x0159, 0x5888)) { value = 0x01; retval = rts51x_write_mem(us, 0x48, &value, 1); if (retval < 0) return -EIO; } } return 0; } static void fw5895_init(struct us_data *us) { struct rts51x_chip *chip = (struct rts51x_chip *)(us->extra); int retval; u8 val; if ((PRODUCT_ID(chip) != 0x0158) || (FW_VERSION(chip) != 0x5895)) { usb_stor_dbg(us, "Not the specified device, return immediately!\n"); } else { retval = rts51x_read_mem(us, 0xFD6F, &val, 1); if (retval == STATUS_SUCCESS && (val & 0x1F) == 0) { val = 0x1F; retval = rts51x_write_mem(us, 0xFD70, &val, 1); if (retval != STATUS_SUCCESS) usb_stor_dbg(us, "Write memory fail\n"); } else { usb_stor_dbg(us, "Read memory fail, OR (val & 0x1F) != 0\n"); } } } #endif #ifdef CONFIG_REALTEK_AUTOPM static void fw5895_set_mmc_wp(struct us_data *us) { struct rts51x_chip *chip = (struct rts51x_chip *)(us->extra); int retval; u8 buf[13]; if ((PRODUCT_ID(chip) != 0x0158) || (FW_VERSION(chip) != 0x5895)) { usb_stor_dbg(us, "Not the specified device, return immediately!\n"); } else { retval = rts51x_read_mem(us, 0xFD6F, buf, 1); if (retval == STATUS_SUCCESS && (buf[0] & 0x24) == 0x24) { /* SD Exist and SD WP */ retval = rts51x_read_mem(us, 0xD04E, buf, 1); if (retval == STATUS_SUCCESS) { buf[0] |= 0x04; retval = rts51x_write_mem(us, 0xFD70, buf, 1); if (retval != STATUS_SUCCESS) usb_stor_dbg(us, "Write memory fail\n"); } else { usb_stor_dbg(us, "Read memory fail\n"); } } else { usb_stor_dbg(us, "Read memory fail, OR (buf[0]&0x24)!=0x24\n"); } } } static void rts51x_modi_suspend_timer(struct rts51x_chip *chip) { struct us_data *us = chip->us; usb_stor_dbg(us, "state:%d\n", rts51x_get_stat(chip)); chip->timer_expires = jiffies + msecs_to_jiffies(1000*ss_delay); mod_timer(&chip->rts51x_suspend_timer, chip->timer_expires); } static void rts51x_suspend_timer_fn(struct timer_list *t) { struct rts51x_chip *chip = timer_container_of(chip, t, rts51x_suspend_timer); struct us_data *us = chip->us; switch (rts51x_get_stat(chip)) { case RTS51X_STAT_INIT: case RTS51X_STAT_RUN: rts51x_modi_suspend_timer(chip); break; case RTS51X_STAT_IDLE: case RTS51X_STAT_SS: usb_stor_dbg(us, "RTS51X_STAT_SS, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); if (atomic_read(&us->pusb_intf->dev.power.usage_count) > 0) { usb_stor_dbg(us, "Ready to enter SS state\n"); rts51x_set_stat(chip, RTS51X_STAT_SS); /* ignore mass storage interface's children */ pm_suspend_ignore_children(&us->pusb_intf->dev, true); usb_autopm_put_interface_async(us->pusb_intf); usb_stor_dbg(us, "RTS51X_STAT_SS 01, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); } break; default: usb_stor_dbg(us, "Unknown state !!!\n"); break; } } static inline int working_scsi(struct scsi_cmnd *srb) { if ((srb->cmnd[0] == TEST_UNIT_READY) || (srb->cmnd[0] == ALLOW_MEDIUM_REMOVAL)) { return 0; } return 1; } static void rts51x_invoke_transport(struct scsi_cmnd *srb, struct us_data *us) { struct rts51x_chip *chip = (struct rts51x_chip *)(us->extra); static int card_first_show = 1; static const u8 media_not_present[] = { 0x70, 0, 0x02, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0x3A, 0, 0, 0, 0, 0 }; static const u8 invalid_cmd_field[] = { 0x70, 0, 0x05, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0x24, 0, 0, 0, 0, 0 }; int ret; if (working_scsi(srb)) { usb_stor_dbg(us, "working scsi, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); if (atomic_read(&us->pusb_intf->dev.power.usage_count) <= 0) { ret = usb_autopm_get_interface(us->pusb_intf); usb_stor_dbg(us, "working scsi, ret=%d\n", ret); } if (rts51x_get_stat(chip) != RTS51X_STAT_RUN) rts51x_set_stat(chip, RTS51X_STAT_RUN); chip->proto_handler_backup(srb, us); } else { if (rts51x_get_stat(chip) == RTS51X_STAT_SS) { usb_stor_dbg(us, "NOT working scsi\n"); if ((srb->cmnd[0] == TEST_UNIT_READY) && (chip->pwr_state == US_SUSPEND)) { if (TST_LUN_READY(chip, srb->device->lun)) { srb->result = SAM_STAT_GOOD; } else { srb->result = SAM_STAT_CHECK_CONDITION; memcpy(srb->sense_buffer, media_not_present, US_SENSE_SIZE); } usb_stor_dbg(us, "TEST_UNIT_READY\n"); goto out; } if (srb->cmnd[0] == ALLOW_MEDIUM_REMOVAL) { int prevent = srb->cmnd[4] & 0x1; if (prevent) { srb->result = SAM_STAT_CHECK_CONDITION; memcpy(srb->sense_buffer, invalid_cmd_field, US_SENSE_SIZE); } else { srb->result = SAM_STAT_GOOD; } usb_stor_dbg(us, "ALLOW_MEDIUM_REMOVAL\n"); goto out; } } else { usb_stor_dbg(us, "NOT working scsi, not SS\n"); chip->proto_handler_backup(srb, us); /* Check whether card is plugged in */ if (srb->cmnd[0] == TEST_UNIT_READY) { if (srb->result == SAM_STAT_GOOD) { SET_LUN_READY(chip, srb->device->lun); if (card_first_show) { card_first_show = 0; fw5895_set_mmc_wp(us); } } else { CLR_LUN_READY(chip, srb->device->lun); card_first_show = 1; } } if (rts51x_get_stat(chip) != RTS51X_STAT_IDLE) rts51x_set_stat(chip, RTS51X_STAT_IDLE); } } out: usb_stor_dbg(us, "state:%d\n", rts51x_get_stat(chip)); if (rts51x_get_stat(chip) == RTS51X_STAT_RUN) rts51x_modi_suspend_timer(chip); } static int realtek_cr_autosuspend_setup(struct us_data *us) { struct rts51x_chip *chip; struct rts51x_status *status = NULL; u8 buf[16]; int retval; chip = (struct rts51x_chip *)us->extra; chip->support_auto_delink = 0; chip->pwr_state = US_RESUME; chip->lun_ready = 0; rts51x_set_stat(chip, RTS51X_STAT_INIT); retval = rts51x_read_status(us, 0, buf, 16, &(chip->status_len)); if (retval != STATUS_SUCCESS) { usb_stor_dbg(us, "Read status fail\n"); return -EIO; } status = chip->status; status->vid = ((u16) buf[0] << 8) | buf[1]; status->pid = ((u16) buf[2] << 8) | buf[3]; status->cur_lun = buf[4]; status->card_type = buf[5]; status->total_lun = buf[6]; status->fw_ver = ((u16) buf[7] << 8) | buf[8]; status->phy_exist = buf[9]; status->multi_flag = buf[10]; status->multi_card = buf[11]; status->log_exist = buf[12]; if (chip->status_len == 16) { status->detailed_type.detailed_type1 = buf[13]; status->function[0] = buf[14]; status->function[1] = buf[15]; } /* back up the proto_handler in us->extra */ chip = (struct rts51x_chip *)(us->extra); chip->proto_handler_backup = us->proto_handler; /* Set the autosuspend_delay to 0 */ pm_runtime_set_autosuspend_delay(&us->pusb_dev->dev, 0); /* override us->proto_handler setted in get_protocol() */ us->proto_handler = rts51x_invoke_transport; chip->timer_expires = 0; timer_setup(&chip->rts51x_suspend_timer, rts51x_suspend_timer_fn, 0); fw5895_init(us); /* enable autosuspend function of the usb device */ usb_enable_autosuspend(us->pusb_dev); return 0; } #endif static void realtek_cr_destructor(void *extra) { struct rts51x_chip *chip = extra; if (!chip) return; #ifdef CONFIG_REALTEK_AUTOPM if (ss_en) { timer_delete(&chip->rts51x_suspend_timer); chip->timer_expires = 0; } #endif kfree(chip->status); } #ifdef CONFIG_PM static int realtek_cr_suspend(struct usb_interface *iface, pm_message_t message) { struct us_data *us = usb_get_intfdata(iface); /* wait until no command is running */ mutex_lock(&us->dev_mutex); config_autodelink_before_power_down(us); mutex_unlock(&us->dev_mutex); return 0; } static int realtek_cr_resume(struct usb_interface *iface) { struct us_data *us = usb_get_intfdata(iface); fw5895_init(us); config_autodelink_after_power_on(us); return 0; } #else #define realtek_cr_suspend NULL #define realtek_cr_resume NULL #endif static int init_realtek_cr(struct us_data *us) { struct rts51x_chip *chip; int size, i, retval; chip = kzalloc(sizeof(struct rts51x_chip), GFP_KERNEL); if (!chip) return -ENOMEM; us->extra = chip; us->extra_destructor = realtek_cr_destructor; us->max_lun = chip->max_lun = rts51x_get_max_lun(us); chip->us = us; usb_stor_dbg(us, "chip->max_lun = %d\n", chip->max_lun); size = (chip->max_lun + 1) * sizeof(struct rts51x_status); chip->status = kzalloc(size, GFP_KERNEL); if (!chip->status) goto INIT_FAIL; for (i = 0; i <= (int)(chip->max_lun); i++) { retval = rts51x_check_status(us, (u8) i); if (retval < 0) goto INIT_FAIL; } if (CHECK_PID(chip, 0x0138) || CHECK_PID(chip, 0x0158) || CHECK_PID(chip, 0x0159)) { if (CHECK_FW_VER(chip, 0x5888) || CHECK_FW_VER(chip, 0x5889) || CHECK_FW_VER(chip, 0x5901)) SET_AUTO_DELINK(chip); if (STATUS_LEN(chip) == 16) { if (SUPPORT_AUTO_DELINK(chip)) SET_AUTO_DELINK(chip); } } #ifdef CONFIG_REALTEK_AUTOPM if (ss_en) realtek_cr_autosuspend_setup(us); #endif usb_stor_dbg(us, "chip->flag = 0x%x\n", chip->flag); (void)config_autodelink_after_power_on(us); return 0; INIT_FAIL: if (us->extra) { kfree(chip->status); kfree(us->extra); us->extra = NULL; } return -EIO; } static struct scsi_host_template realtek_cr_host_template; static int realtek_cr_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct us_data *us; int result; dev_dbg(&intf->dev, "Probe Realtek Card Reader!\n"); result = usb_stor_probe1(&us, intf, id, (id - realtek_cr_ids) + realtek_cr_unusual_dev_list, &realtek_cr_host_template); if (result) return result; result = usb_stor_probe2(us); return result; } static struct usb_driver realtek_cr_driver = { .name = DRV_NAME, .probe = realtek_cr_probe, .disconnect = usb_stor_disconnect, /* .suspend = usb_stor_suspend, */ /* .resume = usb_stor_resume, */ .reset_resume = usb_stor_reset_resume, .suspend = realtek_cr_suspend, .resume = realtek_cr_resume, .pre_reset = usb_stor_pre_reset, .post_reset = usb_stor_post_reset, .id_table = realtek_cr_ids, .soft_unbind = 1, .supports_autosuspend = 1, .no_dynamic_id = 1, }; module_usb_stor_driver(realtek_cr_driver, realtek_cr_host_template, DRV_NAME);
1 13 11 2 10 1 1 2 3 3 2 6 2 4 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2002 Pavel Machek <pavel@ucw.cz> * Copyright (C) 2002-2005 by David Brownell */ // #define DEBUG // error path messages, extra info // #define VERBOSE // more; success messages #include <linux/module.h> #include <linux/netdevice.h> #include <linux/ethtool.h> #include <linux/workqueue.h> #include <linux/mii.h> #include <linux/crc32.h> #include <linux/usb.h> #include <linux/usb/cdc.h> #include <linux/usb/usbnet.h> /* * All known Zaurii lie about their standards conformance. At least * the earliest SA-1100 models lie by saying they support CDC Ethernet. * Some later models (especially PXA-25x and PXA-27x based ones) lie * and say they support CDC MDLM (for access to cell phone modems). * * There are non-Zaurus products that use these same protocols too. * * The annoying thing is that at the same time Sharp was developing * that annoying standards-breaking software, the Linux community had * a simple "CDC Subset" working reliably on the same SA-1100 hardware. * That is, the same functionality but not violating standards. * * The CDC Ethernet nonconformance points are troublesome to hosts * with a true CDC Ethernet implementation: * - Framing appends a CRC, which the spec says drivers "must not" do; * - Transfers data in altsetting zero, instead of altsetting 1; * - All these peripherals use the same ethernet address. * * The CDC MDLM nonconformance is less immediately troublesome, since all * MDLM implementations are quasi-proprietary anyway. */ static struct sk_buff * zaurus_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { int padlen; struct sk_buff *skb2; padlen = 2; if (!skb_cloned(skb)) { int tailroom = skb_tailroom(skb); if ((padlen + 4) <= tailroom) goto done; } skb2 = skb_copy_expand(skb, 0, 4 + padlen, flags); dev_kfree_skb_any(skb); skb = skb2; if (skb) { u32 fcs; done: fcs = crc32_le(~0, skb->data, skb->len); fcs = ~fcs; skb_put_u8(skb, fcs & 0xff); skb_put_u8(skb, (fcs >> 8) & 0xff); skb_put_u8(skb, (fcs >> 16) & 0xff); skb_put_u8(skb, (fcs >> 24) & 0xff); } return skb; } static int zaurus_bind(struct usbnet *dev, struct usb_interface *intf) { /* Belcarra's funky framing has other options; mostly * TRAILERS (!) with 4 bytes CRC, and maybe 2 pad bytes. */ dev->net->hard_header_len += 6; dev->rx_urb_size = dev->net->hard_header_len + dev->net->mtu; return usbnet_generic_cdc_bind(dev, intf); } /* PDA style devices are always connected if present */ static int always_connected (struct usbnet *dev) { return 0; } static const struct driver_info zaurus_sl5x00_info = { .description = "Sharp Zaurus SL-5x00", .flags = FLAG_POINTTOPOINT | FLAG_FRAMING_Z, .check_connect = always_connected, .bind = zaurus_bind, .unbind = usbnet_cdc_unbind, .tx_fixup = zaurus_tx_fixup, }; #define ZAURUS_STRONGARM_INFO ((unsigned long)&zaurus_sl5x00_info) static const struct driver_info zaurus_pxa_info = { .description = "Sharp Zaurus, PXA-2xx based", .flags = FLAG_POINTTOPOINT | FLAG_FRAMING_Z, .check_connect = always_connected, .bind = zaurus_bind, .unbind = usbnet_cdc_unbind, .tx_fixup = zaurus_tx_fixup, }; #define ZAURUS_PXA_INFO ((unsigned long)&zaurus_pxa_info) static const struct driver_info olympus_mxl_info = { .description = "Olympus R1000", .flags = FLAG_POINTTOPOINT | FLAG_FRAMING_Z, .check_connect = always_connected, .bind = zaurus_bind, .unbind = usbnet_cdc_unbind, .tx_fixup = zaurus_tx_fixup, }; #define OLYMPUS_MXL_INFO ((unsigned long)&olympus_mxl_info) /* Some more recent products using Lineo/Belcarra code will wrongly claim * CDC MDLM conformance. They aren't conformant: data endpoints live * in the control interface, there's no data interface, and it's not used * to talk to a cell phone radio. But at least we can detect these two * pseudo-classes, rather than growing this product list with entries for * each new nonconformant product (sigh). */ static const u8 safe_guid[16] = { 0x5d, 0x34, 0xcf, 0x66, 0x11, 0x18, 0x11, 0xd6, 0xa2, 0x1a, 0x00, 0x01, 0x02, 0xca, 0x9a, 0x7f, }; static const u8 blan_guid[16] = { 0x74, 0xf0, 0x3d, 0xbd, 0x1e, 0xc1, 0x44, 0x70, 0xa3, 0x67, 0x71, 0x34, 0xc9, 0xf5, 0x54, 0x37, }; static int blan_mdlm_bind(struct usbnet *dev, struct usb_interface *intf) { u8 *buf = intf->cur_altsetting->extra; int len = intf->cur_altsetting->extralen; struct usb_cdc_mdlm_desc *desc = NULL; struct usb_cdc_mdlm_detail_desc *detail = NULL; while (len > 3) { if (buf [1] != USB_DT_CS_INTERFACE) goto next_desc; /* use bDescriptorSubType, and just verify that we get a * "BLAN" (or "SAFE") descriptor. */ switch (buf [2]) { case USB_CDC_MDLM_TYPE: if (desc) { dev_dbg(&intf->dev, "extra MDLM\n"); goto bad_desc; } desc = (void *) buf; if (desc->bLength != sizeof *desc) { dev_dbg(&intf->dev, "MDLM len %u\n", desc->bLength); goto bad_desc; } /* expect bcdVersion 1.0, ignore */ if (memcmp(&desc->bGUID, blan_guid, 16) && memcmp(&desc->bGUID, safe_guid, 16)) { /* hey, this one might _really_ be MDLM! */ dev_dbg(&intf->dev, "MDLM guid\n"); goto bad_desc; } break; case USB_CDC_MDLM_DETAIL_TYPE: if (detail) { dev_dbg(&intf->dev, "extra MDLM detail\n"); goto bad_desc; } detail = (void *) buf; switch (detail->bGuidDescriptorType) { case 0: /* "SAFE" */ if (detail->bLength != (sizeof *detail + 2)) goto bad_detail; break; case 1: /* "BLAN" */ if (detail->bLength != (sizeof *detail + 3)) goto bad_detail; break; default: goto bad_detail; } /* assuming we either noticed BLAN already, or will * find it soon, there are some data bytes here: * - bmNetworkCapabilities (unused) * - bmDataCapabilities (bits, see below) * - bPad (ignored, for PADAFTER -- BLAN-only) * bits are: * - 0x01 -- Zaurus framing (add CRC) * - 0x02 -- PADBEFORE (CRC includes some padding) * - 0x04 -- PADAFTER (some padding after CRC) * - 0x08 -- "fermat" packet mangling (for hw bugs) * the PADBEFORE appears not to matter; we interop * with devices that use it and those that don't. */ if ((detail->bDetailData[1] & ~0x02) != 0x01) { /* bmDataCapabilities == 0 would be fine too, * but framing is minidriver-coupled for now. */ bad_detail: dev_dbg(&intf->dev, "bad MDLM detail, %d %d %d\n", detail->bLength, detail->bDetailData[0], detail->bDetailData[2]); goto bad_desc; } /* same extra framing as for non-BLAN mode */ dev->net->hard_header_len += 6; dev->rx_urb_size = dev->net->hard_header_len + dev->net->mtu; break; } next_desc: len -= buf [0]; /* bLength */ buf += buf [0]; } if (!desc || !detail) { dev_dbg(&intf->dev, "missing cdc mdlm %s%sdescriptor\n", desc ? "" : "func ", detail ? "" : "detail "); goto bad_desc; } /* There's probably a CDC Ethernet descriptor there, but we can't * rely on the Ethernet address it provides since not all vendors * bother to make it unique. Likewise there's no point in tracking * of the CDC event notifications. */ return usbnet_get_endpoints(dev, intf); bad_desc: dev_info(&dev->udev->dev, "unsupported MDLM descriptors\n"); return -ENODEV; } static const struct driver_info bogus_mdlm_info = { .description = "pseudo-MDLM (BLAN) device", .flags = FLAG_POINTTOPOINT | FLAG_FRAMING_Z, .check_connect = always_connected, .tx_fixup = zaurus_tx_fixup, .bind = blan_mdlm_bind, }; static const struct usb_device_id products [] = { #define ZAURUS_MASTER_INTERFACE \ .bInterfaceClass = USB_CLASS_COMM, \ .bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, \ .bInterfaceProtocol = USB_CDC_PROTO_NONE #define ZAURUS_FAKE_INTERFACE \ .bInterfaceClass = USB_CLASS_COMM, \ .bInterfaceSubClass = USB_CDC_SUBCLASS_MDLM, \ .bInterfaceProtocol = USB_CDC_PROTO_NONE /* SA-1100 based Sharp Zaurus ("collie"), or compatible. */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8004, ZAURUS_MASTER_INTERFACE, .driver_info = ZAURUS_STRONGARM_INFO, }, /* PXA-2xx based models are also lying-about-cdc. If you add any * more devices that claim to be CDC Ethernet, make sure they get * added to the blacklist in cdc_ether too. * * NOTE: OpenZaurus versions with 2.6 kernels won't use these entries, * unlike the older ones with 2.4 "embedix" kernels. */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8005, /* A-300 */ ZAURUS_MASTER_INTERFACE, .driver_info = ZAURUS_PXA_INFO, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8005, /* A-300 */ ZAURUS_FAKE_INTERFACE, .driver_info = (unsigned long)&bogus_mdlm_info, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8006, /* B-500/SL-5600 */ ZAURUS_MASTER_INTERFACE, .driver_info = ZAURUS_PXA_INFO, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8006, /* B-500/SL-5600 */ ZAURUS_FAKE_INTERFACE, .driver_info = (unsigned long)&bogus_mdlm_info, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8007, /* C-700 */ ZAURUS_MASTER_INTERFACE, .driver_info = ZAURUS_PXA_INFO, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8007, /* C-700 */ ZAURUS_FAKE_INTERFACE, .driver_info = (unsigned long)&bogus_mdlm_info, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x9031, /* C-750 C-760 */ ZAURUS_MASTER_INTERFACE, .driver_info = ZAURUS_PXA_INFO, }, { /* C-750/C-760/C-860/SL-C3000 PDA in MDLM mode */ USB_DEVICE_AND_INTERFACE_INFO(0x04DD, 0x9031, USB_CLASS_COMM, USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long) &bogus_mdlm_info, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x9032, /* SL-6000 */ ZAURUS_MASTER_INTERFACE, .driver_info = ZAURUS_PXA_INFO, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x9032, /* SL-6000 */ ZAURUS_FAKE_INTERFACE, .driver_info = (unsigned long)&bogus_mdlm_info, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, /* reported with some C860 units */ .idProduct = 0x9050, /* C-860 */ ZAURUS_MASTER_INTERFACE, .driver_info = ZAURUS_PXA_INFO, }, { /* Motorola Rokr E6 */ USB_DEVICE_AND_INTERFACE_INFO(0x22b8, 0x6027, USB_CLASS_COMM, USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long) &bogus_mdlm_info, }, { /* Motorola MOTOMAGX phones */ USB_DEVICE_AND_INTERFACE_INFO(0x22b8, 0x6425, USB_CLASS_COMM, USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long) &bogus_mdlm_info, }, /* Olympus has some models with a Zaurus-compatible option. * R-1000 uses a FreeScale i.MXL cpu (ARMv4T) */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x07B4, .idProduct = 0x0F02, /* R-1000 */ ZAURUS_MASTER_INTERFACE, .driver_info = OLYMPUS_MXL_INFO, }, /* Logitech Harmony 900 - uses the pseudo-MDLM (BLAN) driver */ { USB_DEVICE_AND_INTERFACE_INFO(0x046d, 0xc11f, USB_CLASS_COMM, USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long) &bogus_mdlm_info, }, { }, // END }; MODULE_DEVICE_TABLE(usb, products); static struct usb_driver zaurus_driver = { .name = "zaurus", .id_table = products, .probe = usbnet_probe, .disconnect = usbnet_disconnect, .suspend = usbnet_suspend, .resume = usbnet_resume, .disable_hub_initiated_lpm = 1, }; module_usb_driver(zaurus_driver); MODULE_AUTHOR("Pavel Machek, David Brownell"); MODULE_DESCRIPTION("Sharp Zaurus PDA, and compatible products"); MODULE_LICENSE("GPL");
123 18 116 85 85 4 17 21 85 33 12 27 2 12 10 20 4 17 13 20 20 20 20 26 4 4 6 7 1 4 3 1 70 8 2 23 48 63 19 55 32 62 7 8 33 40 1 4 2 1 1 2 2 2 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 // SPDX-License-Identifier: GPL-2.0-or-later /* * MIDI byte <-> sequencer event coder * * Copyright (C) 1998,99 Takashi Iwai <tiwai@suse.de>, * Jaroslav Kysela <perex@perex.cz> */ #include <linux/slab.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/module.h> #include <sound/core.h> #include <sound/seq_kernel.h> #include <sound/seq_midi_event.h> #include <sound/asoundef.h> MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>, Jaroslav Kysela <perex@perex.cz>"); MODULE_DESCRIPTION("MIDI byte <-> sequencer event coder"); MODULE_LICENSE("GPL"); /* event type, index into status_event[] */ /* from 0 to 6 are normal commands (note off, on, etc.) for 0x9?-0xe? */ #define ST_INVALID 7 #define ST_SPECIAL 8 #define ST_SYSEX ST_SPECIAL /* from 8 to 15 are events for 0xf0-0xf7 */ /* * prototypes */ static void note_event(struct snd_midi_event *dev, struct snd_seq_event *ev); static void one_param_ctrl_event(struct snd_midi_event *dev, struct snd_seq_event *ev); static void pitchbend_ctrl_event(struct snd_midi_event *dev, struct snd_seq_event *ev); static void two_param_ctrl_event(struct snd_midi_event *dev, struct snd_seq_event *ev); static void one_param_event(struct snd_midi_event *dev, struct snd_seq_event *ev); static void songpos_event(struct snd_midi_event *dev, struct snd_seq_event *ev); static void note_decode(struct snd_seq_event *ev, unsigned char *buf); static void one_param_decode(struct snd_seq_event *ev, unsigned char *buf); static void pitchbend_decode(struct snd_seq_event *ev, unsigned char *buf); static void two_param_decode(struct snd_seq_event *ev, unsigned char *buf); static void songpos_decode(struct snd_seq_event *ev, unsigned char *buf); /* * event list */ static struct status_event_list { int event; int qlen; void (*encode)(struct snd_midi_event *dev, struct snd_seq_event *ev); void (*decode)(struct snd_seq_event *ev, unsigned char *buf); } status_event[] = { /* 0x80 - 0xef */ {SNDRV_SEQ_EVENT_NOTEOFF, 2, note_event, note_decode}, {SNDRV_SEQ_EVENT_NOTEON, 2, note_event, note_decode}, {SNDRV_SEQ_EVENT_KEYPRESS, 2, note_event, note_decode}, {SNDRV_SEQ_EVENT_CONTROLLER, 2, two_param_ctrl_event, two_param_decode}, {SNDRV_SEQ_EVENT_PGMCHANGE, 1, one_param_ctrl_event, one_param_decode}, {SNDRV_SEQ_EVENT_CHANPRESS, 1, one_param_ctrl_event, one_param_decode}, {SNDRV_SEQ_EVENT_PITCHBEND, 2, pitchbend_ctrl_event, pitchbend_decode}, /* invalid */ {SNDRV_SEQ_EVENT_NONE, -1, NULL, NULL}, /* 0xf0 - 0xff */ {SNDRV_SEQ_EVENT_SYSEX, 1, NULL, NULL}, /* sysex: 0xf0 */ {SNDRV_SEQ_EVENT_QFRAME, 1, one_param_event, one_param_decode}, /* 0xf1 */ {SNDRV_SEQ_EVENT_SONGPOS, 2, songpos_event, songpos_decode}, /* 0xf2 */ {SNDRV_SEQ_EVENT_SONGSEL, 1, one_param_event, one_param_decode}, /* 0xf3 */ {SNDRV_SEQ_EVENT_NONE, -1, NULL, NULL}, /* 0xf4 */ {SNDRV_SEQ_EVENT_NONE, -1, NULL, NULL}, /* 0xf5 */ {SNDRV_SEQ_EVENT_TUNE_REQUEST, 0, NULL, NULL}, /* 0xf6 */ {SNDRV_SEQ_EVENT_NONE, -1, NULL, NULL}, /* 0xf7 */ {SNDRV_SEQ_EVENT_CLOCK, 0, NULL, NULL}, /* 0xf8 */ {SNDRV_SEQ_EVENT_NONE, -1, NULL, NULL}, /* 0xf9 */ {SNDRV_SEQ_EVENT_START, 0, NULL, NULL}, /* 0xfa */ {SNDRV_SEQ_EVENT_CONTINUE, 0, NULL, NULL}, /* 0xfb */ {SNDRV_SEQ_EVENT_STOP, 0, NULL, NULL}, /* 0xfc */ {SNDRV_SEQ_EVENT_NONE, -1, NULL, NULL}, /* 0xfd */ {SNDRV_SEQ_EVENT_SENSING, 0, NULL, NULL}, /* 0xfe */ {SNDRV_SEQ_EVENT_RESET, 0, NULL, NULL}, /* 0xff */ }; static int extra_decode_ctrl14(struct snd_midi_event *dev, unsigned char *buf, int len, struct snd_seq_event *ev); static int extra_decode_xrpn(struct snd_midi_event *dev, unsigned char *buf, int count, struct snd_seq_event *ev); static struct extra_event_list { int event; int (*decode)(struct snd_midi_event *dev, unsigned char *buf, int len, struct snd_seq_event *ev); } extra_event[] = { {SNDRV_SEQ_EVENT_CONTROL14, extra_decode_ctrl14}, {SNDRV_SEQ_EVENT_NONREGPARAM, extra_decode_xrpn}, {SNDRV_SEQ_EVENT_REGPARAM, extra_decode_xrpn}, }; /* * new/delete record */ int snd_midi_event_new(int bufsize, struct snd_midi_event **rdev) { struct snd_midi_event *dev; *rdev = NULL; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (dev == NULL) return -ENOMEM; if (bufsize > 0) { dev->buf = kmalloc(bufsize, GFP_KERNEL); if (dev->buf == NULL) { kfree(dev); return -ENOMEM; } } dev->bufsize = bufsize; dev->lastcmd = 0xff; dev->type = ST_INVALID; spin_lock_init(&dev->lock); *rdev = dev; return 0; } EXPORT_SYMBOL(snd_midi_event_new); void snd_midi_event_free(struct snd_midi_event *dev) { if (dev != NULL) { kfree(dev->buf); kfree(dev); } } EXPORT_SYMBOL(snd_midi_event_free); /* * initialize record */ static inline void reset_encode(struct snd_midi_event *dev) { dev->read = 0; dev->qlen = 0; dev->type = ST_INVALID; } void snd_midi_event_reset_encode(struct snd_midi_event *dev) { guard(spinlock_irqsave)(&dev->lock); reset_encode(dev); } EXPORT_SYMBOL(snd_midi_event_reset_encode); void snd_midi_event_reset_decode(struct snd_midi_event *dev) { guard(spinlock_irqsave)(&dev->lock); dev->lastcmd = 0xff; } EXPORT_SYMBOL(snd_midi_event_reset_decode); void snd_midi_event_no_status(struct snd_midi_event *dev, int on) { dev->nostat = on ? 1 : 0; } EXPORT_SYMBOL(snd_midi_event_no_status); /* * read one byte and encode to sequencer event: * return true if MIDI bytes are encoded to an event * false data is not finished */ bool snd_midi_event_encode_byte(struct snd_midi_event *dev, unsigned char c, struct snd_seq_event *ev) { bool rc = false; if (c >= MIDI_CMD_COMMON_CLOCK) { /* real-time event */ ev->type = status_event[ST_SPECIAL + c - 0xf0].event; ev->flags &= ~SNDRV_SEQ_EVENT_LENGTH_MASK; ev->flags |= SNDRV_SEQ_EVENT_LENGTH_FIXED; return ev->type != SNDRV_SEQ_EVENT_NONE; } guard(spinlock_irqsave)(&dev->lock); if ((c & 0x80) && (c != MIDI_CMD_COMMON_SYSEX_END || dev->type != ST_SYSEX)) { /* new command */ dev->buf[0] = c; if ((c & 0xf0) == 0xf0) /* system messages */ dev->type = (c & 0x0f) + ST_SPECIAL; else dev->type = (c >> 4) & 0x07; dev->read = 1; dev->qlen = status_event[dev->type].qlen; } else { if (dev->qlen > 0) { /* rest of command */ dev->buf[dev->read++] = c; if (dev->type != ST_SYSEX) dev->qlen--; } else { /* running status */ dev->buf[1] = c; dev->qlen = status_event[dev->type].qlen - 1; dev->read = 2; } } if (dev->qlen == 0) { ev->type = status_event[dev->type].event; ev->flags &= ~SNDRV_SEQ_EVENT_LENGTH_MASK; ev->flags |= SNDRV_SEQ_EVENT_LENGTH_FIXED; if (status_event[dev->type].encode) /* set data values */ status_event[dev->type].encode(dev, ev); if (dev->type >= ST_SPECIAL) dev->type = ST_INVALID; rc = true; } else if (dev->type == ST_SYSEX) { if (c == MIDI_CMD_COMMON_SYSEX_END || dev->read >= dev->bufsize) { ev->flags &= ~SNDRV_SEQ_EVENT_LENGTH_MASK; ev->flags |= SNDRV_SEQ_EVENT_LENGTH_VARIABLE; ev->type = SNDRV_SEQ_EVENT_SYSEX; ev->data.ext.len = dev->read; ev->data.ext.ptr = dev->buf; if (c != MIDI_CMD_COMMON_SYSEX_END) dev->read = 0; /* continue to parse */ else reset_encode(dev); /* all parsed */ rc = true; } } return rc; } EXPORT_SYMBOL(snd_midi_event_encode_byte); /* encode note event */ static void note_event(struct snd_midi_event *dev, struct snd_seq_event *ev) { ev->data.note.channel = dev->buf[0] & 0x0f; ev->data.note.note = dev->buf[1]; ev->data.note.velocity = dev->buf[2]; } /* encode one parameter controls */ static void one_param_ctrl_event(struct snd_midi_event *dev, struct snd_seq_event *ev) { ev->data.control.channel = dev->buf[0] & 0x0f; ev->data.control.value = dev->buf[1]; } /* encode pitch wheel change */ static void pitchbend_ctrl_event(struct snd_midi_event *dev, struct snd_seq_event *ev) { ev->data.control.channel = dev->buf[0] & 0x0f; ev->data.control.value = (int)dev->buf[2] * 128 + (int)dev->buf[1] - 8192; } /* encode midi control change */ static void two_param_ctrl_event(struct snd_midi_event *dev, struct snd_seq_event *ev) { ev->data.control.channel = dev->buf[0] & 0x0f; ev->data.control.param = dev->buf[1]; ev->data.control.value = dev->buf[2]; } /* encode one parameter value*/ static void one_param_event(struct snd_midi_event *dev, struct snd_seq_event *ev) { ev->data.control.value = dev->buf[1]; } /* encode song position */ static void songpos_event(struct snd_midi_event *dev, struct snd_seq_event *ev) { ev->data.control.value = (int)dev->buf[2] * 128 + (int)dev->buf[1]; } /* * decode from a sequencer event to midi bytes * return the size of decoded midi events */ long snd_midi_event_decode(struct snd_midi_event *dev, unsigned char *buf, long count, struct snd_seq_event *ev) { unsigned int cmd, type; if (ev->type == SNDRV_SEQ_EVENT_NONE) return -ENOENT; for (type = 0; type < ARRAY_SIZE(status_event); type++) { if (ev->type == status_event[type].event) goto __found; } for (type = 0; type < ARRAY_SIZE(extra_event); type++) { if (ev->type == extra_event[type].event) return extra_event[type].decode(dev, buf, count, ev); } return -ENOENT; __found: if (type >= ST_SPECIAL) cmd = 0xf0 + (type - ST_SPECIAL); else /* data.note.channel and data.control.channel is identical */ cmd = 0x80 | (type << 4) | (ev->data.note.channel & 0x0f); if (cmd == MIDI_CMD_COMMON_SYSEX) { snd_midi_event_reset_decode(dev); return snd_seq_expand_var_event(ev, count, buf, 1, 0); } else { int qlen; unsigned char xbuf[4]; unsigned long flags; spin_lock_irqsave(&dev->lock, flags); if ((cmd & 0xf0) == 0xf0 || dev->lastcmd != cmd || dev->nostat) { dev->lastcmd = cmd; spin_unlock_irqrestore(&dev->lock, flags); xbuf[0] = cmd; if (status_event[type].decode) status_event[type].decode(ev, xbuf + 1); qlen = status_event[type].qlen + 1; } else { spin_unlock_irqrestore(&dev->lock, flags); if (status_event[type].decode) status_event[type].decode(ev, xbuf + 0); qlen = status_event[type].qlen; } if (count < qlen) return -ENOMEM; memcpy(buf, xbuf, qlen); return qlen; } } EXPORT_SYMBOL(snd_midi_event_decode); /* decode note event */ static void note_decode(struct snd_seq_event *ev, unsigned char *buf) { buf[0] = ev->data.note.note & 0x7f; buf[1] = ev->data.note.velocity & 0x7f; } /* decode one parameter controls */ static void one_param_decode(struct snd_seq_event *ev, unsigned char *buf) { buf[0] = ev->data.control.value & 0x7f; } /* decode pitch wheel change */ static void pitchbend_decode(struct snd_seq_event *ev, unsigned char *buf) { int value = ev->data.control.value + 8192; buf[0] = value & 0x7f; buf[1] = (value >> 7) & 0x7f; } /* decode midi control change */ static void two_param_decode(struct snd_seq_event *ev, unsigned char *buf) { buf[0] = ev->data.control.param & 0x7f; buf[1] = ev->data.control.value & 0x7f; } /* decode song position */ static void songpos_decode(struct snd_seq_event *ev, unsigned char *buf) { buf[0] = ev->data.control.value & 0x7f; buf[1] = (ev->data.control.value >> 7) & 0x7f; } /* decode 14bit control */ static int extra_decode_ctrl14(struct snd_midi_event *dev, unsigned char *buf, int count, struct snd_seq_event *ev) { unsigned char cmd; int idx = 0; cmd = MIDI_CMD_CONTROL|(ev->data.control.channel & 0x0f); if (ev->data.control.param < 0x20) { if (count < 4) return -ENOMEM; if (dev->nostat && count < 6) return -ENOMEM; if (cmd != dev->lastcmd || dev->nostat) { if (count < 5) return -ENOMEM; buf[idx++] = dev->lastcmd = cmd; } buf[idx++] = ev->data.control.param; buf[idx++] = (ev->data.control.value >> 7) & 0x7f; if (dev->nostat) buf[idx++] = cmd; buf[idx++] = ev->data.control.param + 0x20; buf[idx++] = ev->data.control.value & 0x7f; } else { if (count < 2) return -ENOMEM; if (cmd != dev->lastcmd || dev->nostat) { if (count < 3) return -ENOMEM; buf[idx++] = dev->lastcmd = cmd; } buf[idx++] = ev->data.control.param & 0x7f; buf[idx++] = ev->data.control.value & 0x7f; } return idx; } /* decode reg/nonreg param */ static int extra_decode_xrpn(struct snd_midi_event *dev, unsigned char *buf, int count, struct snd_seq_event *ev) { unsigned char cmd; const char *cbytes; static const char cbytes_nrpn[4] = { MIDI_CTL_NONREG_PARM_NUM_MSB, MIDI_CTL_NONREG_PARM_NUM_LSB, MIDI_CTL_MSB_DATA_ENTRY, MIDI_CTL_LSB_DATA_ENTRY }; static const char cbytes_rpn[4] = { MIDI_CTL_REGIST_PARM_NUM_MSB, MIDI_CTL_REGIST_PARM_NUM_LSB, MIDI_CTL_MSB_DATA_ENTRY, MIDI_CTL_LSB_DATA_ENTRY }; unsigned char bytes[4]; int idx = 0, i; if (count < 8) return -ENOMEM; if (dev->nostat && count < 12) return -ENOMEM; cmd = MIDI_CMD_CONTROL|(ev->data.control.channel & 0x0f); bytes[0] = (ev->data.control.param & 0x3f80) >> 7; bytes[1] = ev->data.control.param & 0x007f; bytes[2] = (ev->data.control.value & 0x3f80) >> 7; bytes[3] = ev->data.control.value & 0x007f; if (cmd != dev->lastcmd && !dev->nostat) { if (count < 9) return -ENOMEM; buf[idx++] = dev->lastcmd = cmd; } cbytes = ev->type == SNDRV_SEQ_EVENT_NONREGPARAM ? cbytes_nrpn : cbytes_rpn; for (i = 0; i < 4; i++) { if (dev->nostat) buf[idx++] = dev->lastcmd = cmd; buf[idx++] = cbytes[i]; buf[idx++] = bytes[i]; } return idx; }
590 99 7003 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM notifier #if !defined(_TRACE_NOTIFIERS_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_NOTIFIERS_H #include <linux/tracepoint.h> DECLARE_EVENT_CLASS(notifier_info, TP_PROTO(void *cb), TP_ARGS(cb), TP_STRUCT__entry( __field(void *, cb) ), TP_fast_assign( __entry->cb = cb; ), TP_printk("%ps", __entry->cb) ); /* * notifier_register - called upon notifier callback registration * * @cb: callback pointer * */ DEFINE_EVENT(notifier_info, notifier_register, TP_PROTO(void *cb), TP_ARGS(cb) ); /* * notifier_unregister - called upon notifier callback unregistration * * @cb: callback pointer * */ DEFINE_EVENT(notifier_info, notifier_unregister, TP_PROTO(void *cb), TP_ARGS(cb) ); /* * notifier_run - called upon notifier callback execution * * @cb: callback pointer * */ DEFINE_EVENT(notifier_info, notifier_run, TP_PROTO(void *cb), TP_ARGS(cb) ); #endif /* _TRACE_NOTIFIERS_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
4 3 3 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 /* * Copyright (c) 2012 Mellanox Technologies. - All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include <linux/netdevice.h> #include <linux/if_arp.h> /* For ARPHRD_xxx */ #include <net/rtnetlink.h> #include "ipoib.h" static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = { [IFLA_IPOIB_PKEY] = { .type = NLA_U16 }, [IFLA_IPOIB_MODE] = { .type = NLA_U16 }, [IFLA_IPOIB_UMCAST] = { .type = NLA_U16 }, }; static unsigned int ipoib_get_max_num_queues(void) { return min_t(unsigned int, num_possible_cpus(), 128); } static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); u16 val; if (nla_put_u16(skb, IFLA_IPOIB_PKEY, priv->pkey)) goto nla_put_failure; val = test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); if (nla_put_u16(skb, IFLA_IPOIB_MODE, val)) goto nla_put_failure; val = test_bit(IPOIB_FLAG_UMCAST, &priv->flags); if (nla_put_u16(skb, IFLA_IPOIB_UMCAST, val)) goto nla_put_failure; return 0; nla_put_failure: return -EMSGSIZE; } static int ipoib_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { u16 mode, umcast; int ret = 0; if (data[IFLA_IPOIB_MODE]) { mode = nla_get_u16(data[IFLA_IPOIB_MODE]); if (mode == IPOIB_MODE_DATAGRAM) ret = ipoib_set_mode(dev, "datagram\n"); else if (mode == IPOIB_MODE_CONNECTED) ret = ipoib_set_mode(dev, "connected\n"); else ret = -EINVAL; if (ret < 0) goto out_err; } if (data[IFLA_IPOIB_UMCAST]) { umcast = nla_get_u16(data[IFLA_IPOIB_UMCAST]); ipoib_set_umcast(dev, umcast); } out_err: return ret; } static int ipoib_new_child_link(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct net *link_net = rtnl_newlink_link_net(params); struct nlattr **data = params->data; struct nlattr **tb = params->tb; struct net_device *pdev; struct ipoib_dev_priv *ppriv; u16 child_pkey; int err; if (!tb[IFLA_LINK]) return -EINVAL; pdev = __dev_get_by_index(link_net, nla_get_u32(tb[IFLA_LINK])); if (!pdev || pdev->type != ARPHRD_INFINIBAND) return -ENODEV; ppriv = ipoib_priv(pdev); if (test_bit(IPOIB_FLAG_SUBINTERFACE, &ppriv->flags)) { ipoib_warn(ppriv, "child creation disallowed for child devices\n"); return -EINVAL; } if (!data || !data[IFLA_IPOIB_PKEY]) { ipoib_dbg(ppriv, "no pkey specified, using parent pkey\n"); child_pkey = ppriv->pkey; } else child_pkey = nla_get_u16(data[IFLA_IPOIB_PKEY]); err = ipoib_intf_init(ppriv->ca, ppriv->port, dev->name, dev); if (err) { ipoib_warn(ppriv, "failed to initialize pkey device\n"); return err; } err = __ipoib_vlan_add(ppriv, ipoib_priv(dev), child_pkey, IPOIB_RTNL_CHILD); if (err) return err; if (data) { err = ipoib_changelink(dev, tb, data, extack); if (err) { unregister_netdevice(dev); return err; } } return 0; } static void ipoib_del_child_link(struct net_device *dev, struct list_head *head) { struct ipoib_dev_priv *priv = ipoib_priv(dev); if (!priv->parent) return; unregister_netdevice_queue(dev, head); } static size_t ipoib_get_size(const struct net_device *dev) { return nla_total_size(2) + /* IFLA_IPOIB_PKEY */ nla_total_size(2) + /* IFLA_IPOIB_MODE */ nla_total_size(2); /* IFLA_IPOIB_UMCAST */ } static struct rtnl_link_ops ipoib_link_ops __read_mostly = { .kind = "ipoib", .netns_refund = true, .maxtype = IFLA_IPOIB_MAX, .policy = ipoib_policy, .priv_size = sizeof(struct ipoib_dev_priv), .setup = ipoib_setup_common, .newlink = ipoib_new_child_link, .dellink = ipoib_del_child_link, .changelink = ipoib_changelink, .get_size = ipoib_get_size, .fill_info = ipoib_fill_info, .get_num_rx_queues = ipoib_get_max_num_queues, .get_num_tx_queues = ipoib_get_max_num_queues, }; struct rtnl_link_ops *ipoib_get_link_ops(void) { return &ipoib_link_ops; } int __init ipoib_netlink_init(void) { return rtnl_link_register(&ipoib_link_ops); } void __exit ipoib_netlink_fini(void) { rtnl_link_unregister(&ipoib_link_ops); } MODULE_ALIAS_RTNL_LINK("ipoib");
85 165 107 149 25 213 230 227 396 395 396 248 20 104 129 189 29 65 96 20 85 86 7 36 11 104 20 592 219 378 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _XFRM_HASH_H #define _XFRM_HASH_H #include <linux/xfrm.h> #include <linux/socket.h> #include <linux/jhash.h> static inline unsigned int __xfrm4_addr_hash(const xfrm_address_t *addr) { return ntohl(addr->a4); } static inline unsigned int __xfrm6_addr_hash(const xfrm_address_t *addr) { return jhash2((__force u32 *)addr->a6, 4, 0); } static inline unsigned int __xfrm4_daddr_saddr_hash(const xfrm_address_t *daddr, const xfrm_address_t *saddr) { u32 sum = (__force u32)daddr->a4 + (__force u32)saddr->a4; return ntohl((__force __be32)sum); } static inline unsigned int __xfrm6_daddr_saddr_hash(const xfrm_address_t *daddr, const xfrm_address_t *saddr) { return __xfrm6_addr_hash(daddr) ^ __xfrm6_addr_hash(saddr); } static inline u32 __bits2mask32(__u8 bits) { u32 mask32 = 0xffffffff; if (bits == 0) mask32 = 0; else if (bits < 32) mask32 <<= (32 - bits); return mask32; } static inline unsigned int __xfrm4_dpref_spref_hash(const xfrm_address_t *daddr, const xfrm_address_t *saddr, __u8 dbits, __u8 sbits) { return jhash_2words(ntohl(daddr->a4) & __bits2mask32(dbits), ntohl(saddr->a4) & __bits2mask32(sbits), 0); } static inline unsigned int __xfrm6_pref_hash(const xfrm_address_t *addr, __u8 prefixlen) { unsigned int pdw; unsigned int pbi; u32 initval = 0; pdw = prefixlen >> 5; /* num of whole u32 in prefix */ pbi = prefixlen & 0x1f; /* num of bits in incomplete u32 in prefix */ if (pbi) { __be32 mask; mask = htonl((0xffffffff) << (32 - pbi)); initval = (__force u32)(addr->a6[pdw] & mask); } return jhash2((__force u32 *)addr->a6, pdw, initval); } static inline unsigned int __xfrm6_dpref_spref_hash(const xfrm_address_t *daddr, const xfrm_address_t *saddr, __u8 dbits, __u8 sbits) { return __xfrm6_pref_hash(daddr, dbits) ^ __xfrm6_pref_hash(saddr, sbits); } static inline unsigned int __xfrm_dst_hash(const xfrm_address_t *daddr, const xfrm_address_t *saddr, u32 reqid, unsigned short family, unsigned int hmask) { unsigned int h = family ^ reqid; switch (family) { case AF_INET: h ^= __xfrm4_daddr_saddr_hash(daddr, saddr); break; case AF_INET6: h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); break; } return (h ^ (h >> 16)) & hmask; } static inline unsigned int __xfrm_src_hash(const xfrm_address_t *daddr, const xfrm_address_t *saddr, unsigned short family, unsigned int hmask) { unsigned int h = family; switch (family) { case AF_INET: h ^= __xfrm4_daddr_saddr_hash(daddr, saddr); break; case AF_INET6: h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); break; } return (h ^ (h >> 16)) & hmask; } static inline unsigned int __xfrm_spi_hash(const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family, unsigned int hmask) { unsigned int h = (__force u32)spi ^ proto; switch (family) { case AF_INET: h ^= __xfrm4_addr_hash(daddr); break; case AF_INET6: h ^= __xfrm6_addr_hash(daddr); break; } return (h ^ (h >> 10) ^ (h >> 20)) & hmask; } static inline unsigned int __xfrm_seq_hash(u32 seq, unsigned int hmask) { unsigned int h = seq; return (h ^ (h >> 10) ^ (h >> 20)) & hmask; } static inline unsigned int __idx_hash(u32 index, unsigned int hmask) { return (index ^ (index >> 8)) & hmask; } static inline unsigned int __sel_hash(const struct xfrm_selector *sel, unsigned short family, unsigned int hmask, u8 dbits, u8 sbits) { const xfrm_address_t *daddr = &sel->daddr; const xfrm_address_t *saddr = &sel->saddr; unsigned int h = 0; switch (family) { case AF_INET: if (sel->prefixlen_d < dbits || sel->prefixlen_s < sbits) return hmask + 1; h = __xfrm4_dpref_spref_hash(daddr, saddr, dbits, sbits); break; case AF_INET6: if (sel->prefixlen_d < dbits || sel->prefixlen_s < sbits) return hmask + 1; h = __xfrm6_dpref_spref_hash(daddr, saddr, dbits, sbits); break; } h ^= (h >> 16); return h & hmask; } static inline unsigned int __addr_hash(const xfrm_address_t *daddr, const xfrm_address_t *saddr, unsigned short family, unsigned int hmask, u8 dbits, u8 sbits) { unsigned int h = 0; switch (family) { case AF_INET: h = __xfrm4_dpref_spref_hash(daddr, saddr, dbits, sbits); break; case AF_INET6: h = __xfrm6_dpref_spref_hash(daddr, saddr, dbits, sbits); break; } h ^= (h >> 16); return h & hmask; } struct hlist_head *xfrm_hash_alloc(unsigned int sz); void xfrm_hash_free(struct hlist_head *n, unsigned int sz); #endif /* _XFRM_HASH_H */
17 14 1 91 91 74 39 76 2 2 74 74 2 73 74 39 74 75 4 74 74 72 19 2 2 2 12 56 49 53 1 54 49 11 48 18 48 12 48 3 63 46 46 46 46 4 46 18 2 3 3 17 17 17 17 17 1 16 1 15 10 1 1 1 20 1 1 1 4 5 8 5 8 4 10 10 4 7 7 4 10 2 6 1 7 11 8 8 25 1 23 61 47 61 61 4 61 61 61 61 61 47 2 61 61 46 61 59 61 3 61 4 61 1 3 5 3 61 30 61 12 1 1 1 11 5 5 1 61 61 12 61 61 61 44 44 44 44 44 6 4 4 1 1 3 1 3 61 29 61 44 17 44 44 44 44 44 18 1 17 17 17 17 1 1 1 1 17 17 17 17 9 9 5 4 1 1 1 20 20 18 20 20 20 20 1 4 3 6 26 26 6 2 2 26 20 6 1 6 26 26 23 24 2 33 33 4 2 1 27 1 20 20 20 6 24 12 22 18 21 22 20 18 12 11 10 3 3 2 3 10 2 11 12 11 4 1 1 4 4 1 4 3 4 11 11 11 4 3 2 1 1 2 4 4 27 1 2 2 23 22 22 22 21 22 15 1 14 2 2 13 31 46 6 6 7 3 5 11 16 2 3 3 21 10 16 4 30 91 91 2 89 1 86 1 42 26 42 12 13 28 42 42 42 66 16 5 13 4 10 75 74 67 46 15 6 1 6 4 6 60 61 40 44 24 42 67 67 21 6 17 58 41 19 5 17 1 1 1 1 1 1 48 47 83 10 49 44 48 2 42 40 101 93 2 91 91 88 3 88 3 91 91 55 34 55 34 58 102 1 101 28 28 28 28 3 28 28 1 28 28 28 28 28 28 28 28 1 28 28 1 2 28 27 26 27 3 2 3 28 28 8 28 28 28 27 28 27 28 3 28 28 28 28 28 28 28 28 28 28 3 14 28 28 1 28 28 28 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 // SPDX-License-Identifier: GPL-2.0 #include <linux/pagewalk.h> #include <linux/mm_inline.h> #include <linux/hugetlb.h> #include <linux/huge_mm.h> #include <linux/mount.h> #include <linux/ksm.h> #include <linux/seq_file.h> #include <linux/highmem.h> #include <linux/ptrace.h> #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/mempolicy.h> #include <linux/rmap.h> #include <linux/swap.h> #include <linux/sched/mm.h> #include <linux/swapops.h> #include <linux/mmu_notifier.h> #include <linux/page_idle.h> #include <linux/shmem_fs.h> #include <linux/uaccess.h> #include <linux/pkeys.h> #include <linux/minmax.h> #include <linux/overflow.h> #include <linux/buildid.h> #include <asm/elf.h> #include <asm/tlb.h> #include <asm/tlbflush.h> #include "internal.h" #define SEQ_PUT_DEC(str, val) \ seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8) void task_mem(struct seq_file *m, struct mm_struct *mm) { unsigned long text, lib, swap, anon, file, shmem; unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; anon = get_mm_counter_sum(mm, MM_ANONPAGES); file = get_mm_counter_sum(mm, MM_FILEPAGES); shmem = get_mm_counter_sum(mm, MM_SHMEMPAGES); /* * Note: to minimize their overhead, mm maintains hiwater_vm and * hiwater_rss only when about to *lower* total_vm or rss. Any * collector of these hiwater stats must therefore get total_vm * and rss too, which will usually be the higher. Barriers? not * worth the effort, such snapshots can always be inconsistent. */ hiwater_vm = total_vm = mm->total_vm; if (hiwater_vm < mm->hiwater_vm) hiwater_vm = mm->hiwater_vm; hiwater_rss = total_rss = anon + file + shmem; if (hiwater_rss < mm->hiwater_rss) hiwater_rss = mm->hiwater_rss; /* split executable areas between text and lib */ text = PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK); text = min(text, mm->exec_vm << PAGE_SHIFT); lib = (mm->exec_vm << PAGE_SHIFT) - text; swap = get_mm_counter_sum(mm, MM_SWAPENTS); SEQ_PUT_DEC("VmPeak:\t", hiwater_vm); SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm); SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm); SEQ_PUT_DEC(" kB\nVmPin:\t", atomic64_read(&mm->pinned_vm)); SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss); SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss); SEQ_PUT_DEC(" kB\nRssAnon:\t", anon); SEQ_PUT_DEC(" kB\nRssFile:\t", file); SEQ_PUT_DEC(" kB\nRssShmem:\t", shmem); SEQ_PUT_DEC(" kB\nVmData:\t", mm->data_vm); SEQ_PUT_DEC(" kB\nVmStk:\t", mm->stack_vm); seq_put_decimal_ull_width(m, " kB\nVmExe:\t", text >> 10, 8); seq_put_decimal_ull_width(m, " kB\nVmLib:\t", lib >> 10, 8); seq_put_decimal_ull_width(m, " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8); SEQ_PUT_DEC(" kB\nVmSwap:\t", swap); seq_puts(m, " kB\n"); hugetlb_report_usage(m, mm); } #undef SEQ_PUT_DEC unsigned long task_vsize(struct mm_struct *mm) { return PAGE_SIZE * mm->total_vm; } unsigned long task_statm(struct mm_struct *mm, unsigned long *shared, unsigned long *text, unsigned long *data, unsigned long *resident) { *shared = get_mm_counter_sum(mm, MM_FILEPAGES) + get_mm_counter_sum(mm, MM_SHMEMPAGES); *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> PAGE_SHIFT; *data = mm->data_vm + mm->stack_vm; *resident = *shared + get_mm_counter_sum(mm, MM_ANONPAGES); return mm->total_vm; } #ifdef CONFIG_NUMA /* * Save get_task_policy() for show_numa_map(). */ static void hold_task_mempolicy(struct proc_maps_private *priv) { struct task_struct *task = priv->task; task_lock(task); priv->task_mempolicy = get_task_policy(task); mpol_get(priv->task_mempolicy); task_unlock(task); } static void release_task_mempolicy(struct proc_maps_private *priv) { mpol_put(priv->task_mempolicy); } #else static void hold_task_mempolicy(struct proc_maps_private *priv) { } static void release_task_mempolicy(struct proc_maps_private *priv) { } #endif static struct vm_area_struct *proc_get_vma(struct proc_maps_private *priv, loff_t *ppos) { struct vm_area_struct *vma = vma_next(&priv->iter); if (vma) { *ppos = vma->vm_start; } else { *ppos = -2UL; vma = get_gate_vma(priv->mm); } return vma; } static void *m_start(struct seq_file *m, loff_t *ppos) { struct proc_maps_private *priv = m->private; unsigned long last_addr = *ppos; struct mm_struct *mm; /* See m_next(). Zero at the start or after lseek. */ if (last_addr == -1UL) return NULL; priv->task = get_proc_task(priv->inode); if (!priv->task) return ERR_PTR(-ESRCH); mm = priv->mm; if (!mm || !mmget_not_zero(mm)) { put_task_struct(priv->task); priv->task = NULL; return NULL; } if (mmap_read_lock_killable(mm)) { mmput(mm); put_task_struct(priv->task); priv->task = NULL; return ERR_PTR(-EINTR); } vma_iter_init(&priv->iter, mm, last_addr); hold_task_mempolicy(priv); if (last_addr == -2UL) return get_gate_vma(mm); return proc_get_vma(priv, ppos); } static void *m_next(struct seq_file *m, void *v, loff_t *ppos) { if (*ppos == -2UL) { *ppos = -1UL; return NULL; } return proc_get_vma(m->private, ppos); } static void m_stop(struct seq_file *m, void *v) { struct proc_maps_private *priv = m->private; struct mm_struct *mm = priv->mm; if (!priv->task) return; release_task_mempolicy(priv); mmap_read_unlock(mm); mmput(mm); put_task_struct(priv->task); priv->task = NULL; } static int proc_maps_open(struct inode *inode, struct file *file, const struct seq_operations *ops, int psize) { struct proc_maps_private *priv = __seq_open_private(file, ops, psize); if (!priv) return -ENOMEM; priv->inode = inode; priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); if (IS_ERR_OR_NULL(priv->mm)) { int err = priv->mm ? PTR_ERR(priv->mm) : -ESRCH; seq_release_private(inode, file); return err; } return 0; } static int proc_map_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; struct proc_maps_private *priv = seq->private; if (priv->mm) mmdrop(priv->mm); return seq_release_private(inode, file); } static int do_maps_open(struct inode *inode, struct file *file, const struct seq_operations *ops) { return proc_maps_open(inode, file, ops, sizeof(struct proc_maps_private)); } static void get_vma_name(struct vm_area_struct *vma, const struct path **path, const char **name, const char **name_fmt) { struct anon_vma_name *anon_name = vma->vm_mm ? anon_vma_name(vma) : NULL; *name = NULL; *path = NULL; *name_fmt = NULL; /* * Print the dentry name for named mappings, and a * special [heap] marker for the heap: */ if (vma->vm_file) { /* * If user named this anon shared memory via * prctl(PR_SET_VMA ..., use the provided name. */ if (anon_name) { *name_fmt = "[anon_shmem:%s]"; *name = anon_name->name; } else { *path = file_user_path(vma->vm_file); } return; } if (vma->vm_ops && vma->vm_ops->name) { *name = vma->vm_ops->name(vma); if (*name) return; } *name = arch_vma_name(vma); if (*name) return; if (!vma->vm_mm) { *name = "[vdso]"; return; } if (vma_is_initial_heap(vma)) { *name = "[heap]"; return; } if (vma_is_initial_stack(vma)) { *name = "[stack]"; return; } if (anon_name) { *name_fmt = "[anon:%s]"; *name = anon_name->name; return; } } static void show_vma_header_prefix(struct seq_file *m, unsigned long start, unsigned long end, vm_flags_t flags, unsigned long long pgoff, dev_t dev, unsigned long ino) { seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); seq_put_hex_ll(m, NULL, start, 8); seq_put_hex_ll(m, "-", end, 8); seq_putc(m, ' '); seq_putc(m, flags & VM_READ ? 'r' : '-'); seq_putc(m, flags & VM_WRITE ? 'w' : '-'); seq_putc(m, flags & VM_EXEC ? 'x' : '-'); seq_putc(m, flags & VM_MAYSHARE ? 's' : 'p'); seq_put_hex_ll(m, " ", pgoff, 8); seq_put_hex_ll(m, " ", MAJOR(dev), 2); seq_put_hex_ll(m, ":", MINOR(dev), 2); seq_put_decimal_ull(m, " ", ino); seq_putc(m, ' '); } static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) { const struct path *path; const char *name_fmt, *name; vm_flags_t flags = vma->vm_flags; unsigned long ino = 0; unsigned long long pgoff = 0; unsigned long start, end; dev_t dev = 0; if (vma->vm_file) { const struct inode *inode = file_user_inode(vma->vm_file); dev = inode->i_sb->s_dev; ino = inode->i_ino; pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; } start = vma->vm_start; end = vma->vm_end; show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino); get_vma_name(vma, &path, &name, &name_fmt); if (path) { seq_pad(m, ' '); seq_path(m, path, "\n"); } else if (name_fmt) { seq_pad(m, ' '); seq_printf(m, name_fmt, name); } else if (name) { seq_pad(m, ' '); seq_puts(m, name); } seq_putc(m, '\n'); } static int show_map(struct seq_file *m, void *v) { show_map_vma(m, v); return 0; } static const struct seq_operations proc_pid_maps_op = { .start = m_start, .next = m_next, .stop = m_stop, .show = show_map }; static int pid_maps_open(struct inode *inode, struct file *file) { return do_maps_open(inode, file, &proc_pid_maps_op); } #define PROCMAP_QUERY_VMA_FLAGS ( \ PROCMAP_QUERY_VMA_READABLE | \ PROCMAP_QUERY_VMA_WRITABLE | \ PROCMAP_QUERY_VMA_EXECUTABLE | \ PROCMAP_QUERY_VMA_SHARED \ ) #define PROCMAP_QUERY_VALID_FLAGS_MASK ( \ PROCMAP_QUERY_COVERING_OR_NEXT_VMA | \ PROCMAP_QUERY_FILE_BACKED_VMA | \ PROCMAP_QUERY_VMA_FLAGS \ ) static int query_vma_setup(struct mm_struct *mm) { return mmap_read_lock_killable(mm); } static void query_vma_teardown(struct mm_struct *mm, struct vm_area_struct *vma) { mmap_read_unlock(mm); } static struct vm_area_struct *query_vma_find_by_addr(struct mm_struct *mm, unsigned long addr) { return find_vma(mm, addr); } static struct vm_area_struct *query_matching_vma(struct mm_struct *mm, unsigned long addr, u32 flags) { struct vm_area_struct *vma; next_vma: vma = query_vma_find_by_addr(mm, addr); if (!vma) goto no_vma; /* user requested only file-backed VMA, keep iterating */ if ((flags & PROCMAP_QUERY_FILE_BACKED_VMA) && !vma->vm_file) goto skip_vma; /* VMA permissions should satisfy query flags */ if (flags & PROCMAP_QUERY_VMA_FLAGS) { u32 perm = 0; if (flags & PROCMAP_QUERY_VMA_READABLE) perm |= VM_READ; if (flags & PROCMAP_QUERY_VMA_WRITABLE) perm |= VM_WRITE; if (flags & PROCMAP_QUERY_VMA_EXECUTABLE) perm |= VM_EXEC; if (flags & PROCMAP_QUERY_VMA_SHARED) perm |= VM_MAYSHARE; if ((vma->vm_flags & perm) != perm) goto skip_vma; } /* found covering VMA or user is OK with the matching next VMA */ if ((flags & PROCMAP_QUERY_COVERING_OR_NEXT_VMA) || vma->vm_start <= addr) return vma; skip_vma: /* * If the user needs closest matching VMA, keep iterating. */ addr = vma->vm_end; if (flags & PROCMAP_QUERY_COVERING_OR_NEXT_VMA) goto next_vma; no_vma: return ERR_PTR(-ENOENT); } static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) { struct procmap_query karg; struct vm_area_struct *vma; struct mm_struct *mm; const char *name = NULL; char build_id_buf[BUILD_ID_SIZE_MAX], *name_buf = NULL; __u64 usize; int err; if (copy_from_user(&usize, (void __user *)uarg, sizeof(usize))) return -EFAULT; /* argument struct can never be that large, reject abuse */ if (usize > PAGE_SIZE) return -E2BIG; /* argument struct should have at least query_flags and query_addr fields */ if (usize < offsetofend(struct procmap_query, query_addr)) return -EINVAL; err = copy_struct_from_user(&karg, sizeof(karg), uarg, usize); if (err) return err; /* reject unknown flags */ if (karg.query_flags & ~PROCMAP_QUERY_VALID_FLAGS_MASK) return -EINVAL; /* either both buffer address and size are set, or both should be zero */ if (!!karg.vma_name_size != !!karg.vma_name_addr) return -EINVAL; if (!!karg.build_id_size != !!karg.build_id_addr) return -EINVAL; mm = priv->mm; if (!mm || !mmget_not_zero(mm)) return -ESRCH; err = query_vma_setup(mm); if (err) { mmput(mm); return err; } vma = query_matching_vma(mm, karg.query_addr, karg.query_flags); if (IS_ERR(vma)) { err = PTR_ERR(vma); vma = NULL; goto out; } karg.vma_start = vma->vm_start; karg.vma_end = vma->vm_end; karg.vma_flags = 0; if (vma->vm_flags & VM_READ) karg.vma_flags |= PROCMAP_QUERY_VMA_READABLE; if (vma->vm_flags & VM_WRITE) karg.vma_flags |= PROCMAP_QUERY_VMA_WRITABLE; if (vma->vm_flags & VM_EXEC) karg.vma_flags |= PROCMAP_QUERY_VMA_EXECUTABLE; if (vma->vm_flags & VM_MAYSHARE) karg.vma_flags |= PROCMAP_QUERY_VMA_SHARED; karg.vma_page_size = vma_kernel_pagesize(vma); if (vma->vm_file) { const struct inode *inode = file_user_inode(vma->vm_file); karg.vma_offset = ((__u64)vma->vm_pgoff) << PAGE_SHIFT; karg.dev_major = MAJOR(inode->i_sb->s_dev); karg.dev_minor = MINOR(inode->i_sb->s_dev); karg.inode = inode->i_ino; } else { karg.vma_offset = 0; karg.dev_major = 0; karg.dev_minor = 0; karg.inode = 0; } if (karg.build_id_size) { __u32 build_id_sz; err = build_id_parse(vma, build_id_buf, &build_id_sz); if (err) { karg.build_id_size = 0; } else { if (karg.build_id_size < build_id_sz) { err = -ENAMETOOLONG; goto out; } karg.build_id_size = build_id_sz; } } if (karg.vma_name_size) { size_t name_buf_sz = min_t(size_t, PATH_MAX, karg.vma_name_size); const struct path *path; const char *name_fmt; size_t name_sz = 0; get_vma_name(vma, &path, &name, &name_fmt); if (path || name_fmt || name) { name_buf = kmalloc(name_buf_sz, GFP_KERNEL); if (!name_buf) { err = -ENOMEM; goto out; } } if (path) { name = d_path(path, name_buf, name_buf_sz); if (IS_ERR(name)) { err = PTR_ERR(name); goto out; } name_sz = name_buf + name_buf_sz - name; } else if (name || name_fmt) { name_sz = 1 + snprintf(name_buf, name_buf_sz, name_fmt ?: "%s", name); name = name_buf; } if (name_sz > name_buf_sz) { err = -ENAMETOOLONG; goto out; } karg.vma_name_size = name_sz; } /* unlock vma or mmap_lock, and put mm_struct before copying data to user */ query_vma_teardown(mm, vma); mmput(mm); if (karg.vma_name_size && copy_to_user(u64_to_user_ptr(karg.vma_name_addr), name, karg.vma_name_size)) { kfree(name_buf); return -EFAULT; } kfree(name_buf); if (karg.build_id_size && copy_to_user(u64_to_user_ptr(karg.build_id_addr), build_id_buf, karg.build_id_size)) return -EFAULT; if (copy_to_user(uarg, &karg, min_t(size_t, sizeof(karg), usize))) return -EFAULT; return 0; out: query_vma_teardown(mm, vma); mmput(mm); kfree(name_buf); return err; } static long procfs_procmap_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct seq_file *seq = file->private_data; struct proc_maps_private *priv = seq->private; switch (cmd) { case PROCMAP_QUERY: return do_procmap_query(priv, (void __user *)arg); default: return -ENOIOCTLCMD; } } const struct file_operations proc_pid_maps_operations = { .open = pid_maps_open, .read = seq_read, .llseek = seq_lseek, .release = proc_map_release, .unlocked_ioctl = procfs_procmap_ioctl, .compat_ioctl = compat_ptr_ioctl, }; /* * Proportional Set Size(PSS): my share of RSS. * * PSS of a process is the count of pages it has in memory, where each * page is divided by the number of processes sharing it. So if a * process has 1000 pages all to itself, and 1000 shared with one other * process, its PSS will be 1500. * * To keep (accumulated) division errors low, we adopt a 64bit * fixed-point pss counter to minimize division errors. So (pss >> * PSS_SHIFT) would be the real byte count. * * A shift of 12 before division means (assuming 4K page size): * - 1M 3-user-pages add up to 8KB errors; * - supports mapcount up to 2^24, or 16M; * - supports PSS up to 2^52 bytes, or 4PB. */ #define PSS_SHIFT 12 #ifdef CONFIG_PROC_PAGE_MONITOR struct mem_size_stats { unsigned long resident; unsigned long shared_clean; unsigned long shared_dirty; unsigned long private_clean; unsigned long private_dirty; unsigned long referenced; unsigned long anonymous; unsigned long lazyfree; unsigned long anonymous_thp; unsigned long shmem_thp; unsigned long file_thp; unsigned long swap; unsigned long shared_hugetlb; unsigned long private_hugetlb; unsigned long ksm; u64 pss; u64 pss_anon; u64 pss_file; u64 pss_shmem; u64 pss_dirty; u64 pss_locked; u64 swap_pss; }; static void smaps_page_accumulate(struct mem_size_stats *mss, struct folio *folio, unsigned long size, unsigned long pss, bool dirty, bool locked, bool private) { mss->pss += pss; if (folio_test_anon(folio)) mss->pss_anon += pss; else if (folio_test_swapbacked(folio)) mss->pss_shmem += pss; else mss->pss_file += pss; if (locked) mss->pss_locked += pss; if (dirty || folio_test_dirty(folio)) { mss->pss_dirty += pss; if (private) mss->private_dirty += size; else mss->shared_dirty += size; } else { if (private) mss->private_clean += size; else mss->shared_clean += size; } } static void smaps_account(struct mem_size_stats *mss, struct page *page, bool compound, bool young, bool dirty, bool locked, bool present) { struct folio *folio = page_folio(page); int i, nr = compound ? compound_nr(page) : 1; unsigned long size = nr * PAGE_SIZE; bool exclusive; int mapcount; /* * First accumulate quantities that depend only on |size| and the type * of the compound page. */ if (folio_test_anon(folio)) { mss->anonymous += size; if (!folio_test_swapbacked(folio) && !dirty && !folio_test_dirty(folio)) mss->lazyfree += size; } if (folio_test_ksm(folio)) mss->ksm += size; mss->resident += size; /* Accumulate the size in pages that have been accessed. */ if (young || folio_test_young(folio) || folio_test_referenced(folio)) mss->referenced += size; /* * Then accumulate quantities that may depend on sharing, or that may * differ page-by-page. * * refcount == 1 for present entries guarantees that the folio is mapped * exactly once. For large folios this implies that exactly one * PTE/PMD/... maps (a part of) this folio. * * Treat all non-present entries (where relying on the mapcount and * refcount doesn't make sense) as "maybe shared, but not sure how * often". We treat device private entries as being fake-present. * * Note that it would not be safe to read the mapcount especially for * pages referenced by migration entries, even with the PTL held. */ if (folio_ref_count(folio) == 1 || !present) { smaps_page_accumulate(mss, folio, size, size << PSS_SHIFT, dirty, locked, present); return; } if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) { mapcount = folio_average_page_mapcount(folio); exclusive = !folio_maybe_mapped_shared(folio); } /* * We obtain a snapshot of the mapcount. Without holding the folio lock * this snapshot can be slightly wrong as we cannot always read the * mapcount atomically. */ for (i = 0; i < nr; i++, page++) { unsigned long pss = PAGE_SIZE << PSS_SHIFT; if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) { mapcount = folio_precise_page_mapcount(folio, page); exclusive = mapcount < 2; } if (mapcount >= 2) pss /= mapcount; smaps_page_accumulate(mss, folio, PAGE_SIZE, pss, dirty, locked, exclusive); } } #ifdef CONFIG_SHMEM static int smaps_pte_hole(unsigned long addr, unsigned long end, __always_unused int depth, struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; mss->swap += shmem_partial_swap_usage(walk->vma->vm_file->f_mapping, linear_page_index(vma, addr), linear_page_index(vma, end)); return 0; } #else #define smaps_pte_hole NULL #endif /* CONFIG_SHMEM */ static void smaps_pte_hole_lookup(unsigned long addr, struct mm_walk *walk) { #ifdef CONFIG_SHMEM if (walk->ops->pte_hole) { /* depth is not used */ smaps_pte_hole(addr, addr + PAGE_SIZE, 0, walk); } #endif } static void smaps_pte_entry(pte_t *pte, unsigned long addr, struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; bool present = false, young = false, dirty = false; pte_t ptent = ptep_get(pte); if (pte_present(ptent)) { page = vm_normal_page(vma, addr, ptent); young = pte_young(ptent); dirty = pte_dirty(ptent); present = true; } else if (is_swap_pte(ptent)) { swp_entry_t swpent = pte_to_swp_entry(ptent); if (!non_swap_entry(swpent)) { int mapcount; mss->swap += PAGE_SIZE; mapcount = swp_swapcount(swpent); if (mapcount >= 2) { u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT; do_div(pss_delta, mapcount); mss->swap_pss += pss_delta; } else { mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; } } else if (is_pfn_swap_entry(swpent)) { if (is_device_private_entry(swpent)) present = true; page = pfn_swap_entry_to_page(swpent); } } else { smaps_pte_hole_lookup(addr, walk); return; } if (!page) return; smaps_account(mss, page, false, young, dirty, locked, present); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; bool present = false; struct folio *folio; if (pmd_present(*pmd)) { page = vm_normal_page_pmd(vma, addr, *pmd); present = true; } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { swp_entry_t entry = pmd_to_swp_entry(*pmd); if (is_pfn_swap_entry(entry)) page = pfn_swap_entry_to_page(entry); } if (IS_ERR_OR_NULL(page)) return; folio = page_folio(page); if (folio_test_anon(folio)) mss->anonymous_thp += HPAGE_PMD_SIZE; else if (folio_test_swapbacked(folio)) mss->shmem_thp += HPAGE_PMD_SIZE; else if (folio_is_zone_device(folio)) /* pass */; else mss->file_thp += HPAGE_PMD_SIZE; smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked, present); } #else static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, struct mm_walk *walk) { } #endif static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->vma; pte_t *pte; spinlock_t *ptl; ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { smaps_pmd_entry(pmd, addr, walk); spin_unlock(ptl); goto out; } pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (!pte) { walk->action = ACTION_AGAIN; return 0; } for (; addr != end; pte++, addr += PAGE_SIZE) smaps_pte_entry(pte, addr, walk); pte_unmap_unlock(pte - 1, ptl); out: cond_resched(); return 0; } static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) { /* * Don't forget to update Documentation/ on changes. * * The length of the second argument of mnemonics[] * needs to be 3 instead of previously set 2 * (i.e. from [BITS_PER_LONG][2] to [BITS_PER_LONG][3]) * to avoid spurious * -Werror=unterminated-string-initialization warning * with GCC 15 */ static const char mnemonics[BITS_PER_LONG][3] = { /* * In case if we meet a flag we don't know about. */ [0 ... (BITS_PER_LONG-1)] = "??", [ilog2(VM_READ)] = "rd", [ilog2(VM_WRITE)] = "wr", [ilog2(VM_EXEC)] = "ex", [ilog2(VM_SHARED)] = "sh", [ilog2(VM_MAYREAD)] = "mr", [ilog2(VM_MAYWRITE)] = "mw", [ilog2(VM_MAYEXEC)] = "me", [ilog2(VM_MAYSHARE)] = "ms", [ilog2(VM_GROWSDOWN)] = "gd", [ilog2(VM_PFNMAP)] = "pf", [ilog2(VM_LOCKED)] = "lo", [ilog2(VM_IO)] = "io", [ilog2(VM_SEQ_READ)] = "sr", [ilog2(VM_RAND_READ)] = "rr", [ilog2(VM_DONTCOPY)] = "dc", [ilog2(VM_DONTEXPAND)] = "de", [ilog2(VM_LOCKONFAULT)] = "lf", [ilog2(VM_ACCOUNT)] = "ac", [ilog2(VM_NORESERVE)] = "nr", [ilog2(VM_HUGETLB)] = "ht", [ilog2(VM_SYNC)] = "sf", [ilog2(VM_ARCH_1)] = "ar", [ilog2(VM_WIPEONFORK)] = "wf", [ilog2(VM_DONTDUMP)] = "dd", #ifdef CONFIG_ARM64_BTI [ilog2(VM_ARM64_BTI)] = "bt", #endif #ifdef CONFIG_MEM_SOFT_DIRTY [ilog2(VM_SOFTDIRTY)] = "sd", #endif [ilog2(VM_MIXEDMAP)] = "mm", [ilog2(VM_HUGEPAGE)] = "hg", [ilog2(VM_NOHUGEPAGE)] = "nh", [ilog2(VM_MERGEABLE)] = "mg", [ilog2(VM_UFFD_MISSING)]= "um", [ilog2(VM_UFFD_WP)] = "uw", #ifdef CONFIG_ARM64_MTE [ilog2(VM_MTE)] = "mt", [ilog2(VM_MTE_ALLOWED)] = "", #endif #ifdef CONFIG_ARCH_HAS_PKEYS /* These come out via ProtectionKey: */ [ilog2(VM_PKEY_BIT0)] = "", [ilog2(VM_PKEY_BIT1)] = "", [ilog2(VM_PKEY_BIT2)] = "", #if VM_PKEY_BIT3 [ilog2(VM_PKEY_BIT3)] = "", #endif #if VM_PKEY_BIT4 [ilog2(VM_PKEY_BIT4)] = "", #endif #endif /* CONFIG_ARCH_HAS_PKEYS */ #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR [ilog2(VM_UFFD_MINOR)] = "ui", #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ #ifdef CONFIG_ARCH_HAS_USER_SHADOW_STACK [ilog2(VM_SHADOW_STACK)] = "ss", #endif #if defined(CONFIG_64BIT) || defined(CONFIG_PPC32) [ilog2(VM_DROPPABLE)] = "dp", #endif #ifdef CONFIG_64BIT [ilog2(VM_SEALED)] = "sl", #endif }; size_t i; seq_puts(m, "VmFlags: "); for (i = 0; i < BITS_PER_LONG; i++) { if (!mnemonics[i][0]) continue; if (vma->vm_flags & (1UL << i)) seq_printf(m, "%s ", mnemonics[i]); } seq_putc(m, '\n'); } #ifdef CONFIG_HUGETLB_PAGE static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; pte_t ptent = huge_ptep_get(walk->mm, addr, pte); struct folio *folio = NULL; bool present = false; if (pte_present(ptent)) { folio = page_folio(pte_page(ptent)); present = true; } else if (is_swap_pte(ptent)) { swp_entry_t swpent = pte_to_swp_entry(ptent); if (is_pfn_swap_entry(swpent)) folio = pfn_swap_entry_folio(swpent); } if (folio) { /* We treat non-present entries as "maybe shared". */ if (!present || folio_maybe_mapped_shared(folio) || hugetlb_pmd_shared(pte)) mss->shared_hugetlb += huge_page_size(hstate_vma(vma)); else mss->private_hugetlb += huge_page_size(hstate_vma(vma)); } return 0; } #else #define smaps_hugetlb_range NULL #endif /* HUGETLB_PAGE */ static const struct mm_walk_ops smaps_walk_ops = { .pmd_entry = smaps_pte_range, .hugetlb_entry = smaps_hugetlb_range, .walk_lock = PGWALK_RDLOCK, }; static const struct mm_walk_ops smaps_shmem_walk_ops = { .pmd_entry = smaps_pte_range, .hugetlb_entry = smaps_hugetlb_range, .pte_hole = smaps_pte_hole, .walk_lock = PGWALK_RDLOCK, }; /* * Gather mem stats from @vma with the indicated beginning * address @start, and keep them in @mss. * * Use vm_start of @vma as the beginning address if @start is 0. */ static void smap_gather_stats(struct vm_area_struct *vma, struct mem_size_stats *mss, unsigned long start) { const struct mm_walk_ops *ops = &smaps_walk_ops; /* Invalid start */ if (start >= vma->vm_end) return; if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) { /* * For shared or readonly shmem mappings we know that all * swapped out pages belong to the shmem object, and we can * obtain the swap value much more efficiently. For private * writable mappings, we might have COW pages that are * not affected by the parent swapped out pages of the shmem * object, so we have to distinguish them during the page walk. * Unless we know that the shmem object (or the part mapped by * our VMA) has no swapped out pages at all. */ unsigned long shmem_swapped = shmem_swap_usage(vma); if (!start && (!shmem_swapped || (vma->vm_flags & VM_SHARED) || !(vma->vm_flags & VM_WRITE))) { mss->swap += shmem_swapped; } else { ops = &smaps_shmem_walk_ops; } } /* mmap_lock is held in m_start */ if (!start) walk_page_vma(vma, ops, mss); else walk_page_range(vma->vm_mm, start, vma->vm_end, ops, mss); } #define SEQ_PUT_DEC(str, val) \ seq_put_decimal_ull_width(m, str, (val) >> 10, 8) /* Show the contents common for smaps and smaps_rollup */ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss, bool rollup_mode) { SEQ_PUT_DEC("Rss: ", mss->resident); SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT); SEQ_PUT_DEC(" kB\nPss_Dirty: ", mss->pss_dirty >> PSS_SHIFT); if (rollup_mode) { /* * These are meaningful only for smaps_rollup, otherwise two of * them are zero, and the other one is the same as Pss. */ SEQ_PUT_DEC(" kB\nPss_Anon: ", mss->pss_anon >> PSS_SHIFT); SEQ_PUT_DEC(" kB\nPss_File: ", mss->pss_file >> PSS_SHIFT); SEQ_PUT_DEC(" kB\nPss_Shmem: ", mss->pss_shmem >> PSS_SHIFT); } SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean); SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty); SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean); SEQ_PUT_DEC(" kB\nPrivate_Dirty: ", mss->private_dirty); SEQ_PUT_DEC(" kB\nReferenced: ", mss->referenced); SEQ_PUT_DEC(" kB\nAnonymous: ", mss->anonymous); SEQ_PUT_DEC(" kB\nKSM: ", mss->ksm); SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree); SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp); SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp); SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp); SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb); seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ", mss->private_hugetlb >> 10, 7); SEQ_PUT_DEC(" kB\nSwap: ", mss->swap); SEQ_PUT_DEC(" kB\nSwapPss: ", mss->swap_pss >> PSS_SHIFT); SEQ_PUT_DEC(" kB\nLocked: ", mss->pss_locked >> PSS_SHIFT); seq_puts(m, " kB\n"); } static int show_smap(struct seq_file *m, void *v) { struct vm_area_struct *vma = v; struct mem_size_stats mss = {}; smap_gather_stats(vma, &mss, 0); show_map_vma(m, vma); SEQ_PUT_DEC("Size: ", vma->vm_end - vma->vm_start); SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma)); SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma)); seq_puts(m, " kB\n"); __show_smap(m, &mss, false); seq_printf(m, "THPeligible: %8u\n", !!thp_vma_allowable_orders(vma, vma->vm_flags, TVA_SMAPS | TVA_ENFORCE_SYSFS, THP_ORDERS_ALL)); if (arch_pkeys_enabled()) seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma)); show_smap_vma_flags(m, vma); return 0; } static int show_smaps_rollup(struct seq_file *m, void *v) { struct proc_maps_private *priv = m->private; struct mem_size_stats mss = {}; struct mm_struct *mm = priv->mm; struct vm_area_struct *vma; unsigned long vma_start = 0, last_vma_end = 0; int ret = 0; VMA_ITERATOR(vmi, mm, 0); priv->task = get_proc_task(priv->inode); if (!priv->task) return -ESRCH; if (!mm || !mmget_not_zero(mm)) { ret = -ESRCH; goto out_put_task; } ret = mmap_read_lock_killable(mm); if (ret) goto out_put_mm; hold_task_mempolicy(priv); vma = vma_next(&vmi); if (unlikely(!vma)) goto empty_set; vma_start = vma->vm_start; do { smap_gather_stats(vma, &mss, 0); last_vma_end = vma->vm_end; /* * Release mmap_lock temporarily if someone wants to * access it for write request. */ if (mmap_lock_is_contended(mm)) { vma_iter_invalidate(&vmi); mmap_read_unlock(mm); ret = mmap_read_lock_killable(mm); if (ret) { release_task_mempolicy(priv); goto out_put_mm; } /* * After dropping the lock, there are four cases to * consider. See the following example for explanation. * * +------+------+-----------+ * | VMA1 | VMA2 | VMA3 | * +------+------+-----------+ * | | | | * 4k 8k 16k 400k * * Suppose we drop the lock after reading VMA2 due to * contention, then we get: * * last_vma_end = 16k * * 1) VMA2 is freed, but VMA3 exists: * * vma_next(vmi) will return VMA3. * In this case, just continue from VMA3. * * 2) VMA2 still exists: * * vma_next(vmi) will return VMA3. * In this case, just continue from VMA3. * * 3) No more VMAs can be found: * * vma_next(vmi) will return NULL. * No more things to do, just break. * * 4) (last_vma_end - 1) is the middle of a vma (VMA'): * * vma_next(vmi) will return VMA' whose range * contains last_vma_end. * Iterate VMA' from last_vma_end. */ vma = vma_next(&vmi); /* Case 3 above */ if (!vma) break; /* Case 1 and 2 above */ if (vma->vm_start >= last_vma_end) { smap_gather_stats(vma, &mss, 0); last_vma_end = vma->vm_end; continue; } /* Case 4 above */ if (vma->vm_end > last_vma_end) { smap_gather_stats(vma, &mss, last_vma_end); last_vma_end = vma->vm_end; } } } for_each_vma(vmi, vma); empty_set: show_vma_header_prefix(m, vma_start, last_vma_end, 0, 0, 0, 0); seq_pad(m, ' '); seq_puts(m, "[rollup]\n"); __show_smap(m, &mss, true); release_task_mempolicy(priv); mmap_read_unlock(mm); out_put_mm: mmput(mm); out_put_task: put_task_struct(priv->task); priv->task = NULL; return ret; } #undef SEQ_PUT_DEC static const struct seq_operations proc_pid_smaps_op = { .start = m_start, .next = m_next, .stop = m_stop, .show = show_smap }; static int pid_smaps_open(struct inode *inode, struct file *file) { return do_maps_open(inode, file, &proc_pid_smaps_op); } static int smaps_rollup_open(struct inode *inode, struct file *file) { int ret; struct proc_maps_private *priv; priv = kzalloc(sizeof(*priv), GFP_KERNEL_ACCOUNT); if (!priv) return -ENOMEM; ret = single_open(file, show_smaps_rollup, priv); if (ret) goto out_free; priv->inode = inode; priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); if (IS_ERR_OR_NULL(priv->mm)) { ret = priv->mm ? PTR_ERR(priv->mm) : -ESRCH; single_release(inode, file); goto out_free; } return 0; out_free: kfree(priv); return ret; } static int smaps_rollup_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; struct proc_maps_private *priv = seq->private; if (priv->mm) mmdrop(priv->mm); kfree(priv); return single_release(inode, file); } const struct file_operations proc_pid_smaps_operations = { .open = pid_smaps_open, .read = seq_read, .llseek = seq_lseek, .release = proc_map_release, }; const struct file_operations proc_pid_smaps_rollup_operations = { .open = smaps_rollup_open, .read = seq_read, .llseek = seq_lseek, .release = smaps_rollup_release, }; enum clear_refs_types { CLEAR_REFS_ALL = 1, CLEAR_REFS_ANON, CLEAR_REFS_MAPPED, CLEAR_REFS_SOFT_DIRTY, CLEAR_REFS_MM_HIWATER_RSS, CLEAR_REFS_LAST, }; struct clear_refs_private { enum clear_refs_types type; }; #ifdef CONFIG_MEM_SOFT_DIRTY static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte) { struct folio *folio; if (!pte_write(pte)) return false; if (!is_cow_mapping(vma->vm_flags)) return false; if (likely(!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags))) return false; folio = vm_normal_folio(vma, addr, pte); if (!folio) return false; return folio_maybe_dma_pinned(folio); } static inline void clear_soft_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *pte) { /* * The soft-dirty tracker uses #PF-s to catch writes * to pages, so write-protect the pte as well. See the * Documentation/admin-guide/mm/soft-dirty.rst for full description * of how soft-dirty works. */ pte_t ptent = ptep_get(pte); if (pte_present(ptent)) { pte_t old_pte; if (pte_is_pinned(vma, addr, ptent)) return; old_pte = ptep_modify_prot_start(vma, addr, pte); ptent = pte_wrprotect(old_pte); ptent = pte_clear_soft_dirty(ptent); ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent); } else if (is_swap_pte(ptent)) { ptent = pte_swp_clear_soft_dirty(ptent); set_pte_at(vma->vm_mm, addr, pte, ptent); } } #else static inline void clear_soft_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *pte) { } #endif #if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE) static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { pmd_t old, pmd = *pmdp; if (pmd_present(pmd)) { /* See comment in change_huge_pmd() */ old = pmdp_invalidate(vma, addr, pmdp); if (pmd_dirty(old)) pmd = pmd_mkdirty(pmd); if (pmd_young(old)) pmd = pmd_mkyoung(pmd); pmd = pmd_wrprotect(pmd); pmd = pmd_clear_soft_dirty(pmd); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); } else if (is_migration_entry(pmd_to_swp_entry(pmd))) { pmd = pmd_swp_clear_soft_dirty(pmd); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); } } #else static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { } #endif static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct clear_refs_private *cp = walk->private; struct vm_area_struct *vma = walk->vma; pte_t *pte, ptent; spinlock_t *ptl; struct folio *folio; ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { if (cp->type == CLEAR_REFS_SOFT_DIRTY) { clear_soft_dirty_pmd(vma, addr, pmd); goto out; } if (!pmd_present(*pmd)) goto out; folio = pmd_folio(*pmd); /* Clear accessed and referenced bits. */ pmdp_test_and_clear_young(vma, addr, pmd); folio_test_clear_young(folio); folio_clear_referenced(folio); out: spin_unlock(ptl); return 0; } pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (!pte) { walk->action = ACTION_AGAIN; return 0; } for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = ptep_get(pte); if (cp->type == CLEAR_REFS_SOFT_DIRTY) { clear_soft_dirty(vma, addr, pte); continue; } if (!pte_present(ptent)) continue; folio = vm_normal_folio(vma, addr, ptent); if (!folio) continue; /* Clear accessed and referenced bits. */ ptep_test_and_clear_young(vma, addr, pte); folio_test_clear_young(folio); folio_clear_referenced(folio); } pte_unmap_unlock(pte - 1, ptl); cond_resched(); return 0; } static int clear_refs_test_walk(unsigned long start, unsigned long end, struct mm_walk *walk) { struct clear_refs_private *cp = walk->private; struct vm_area_struct *vma = walk->vma; if (vma->vm_flags & VM_PFNMAP) return 1; /* * Writing 1 to /proc/pid/clear_refs affects all pages. * Writing 2 to /proc/pid/clear_refs only affects anonymous pages. * Writing 3 to /proc/pid/clear_refs only affects file mapped pages. * Writing 4 to /proc/pid/clear_refs affects all pages. */ if (cp->type == CLEAR_REFS_ANON && vma->vm_file) return 1; if (cp->type == CLEAR_REFS_MAPPED && !vma->vm_file) return 1; return 0; } static const struct mm_walk_ops clear_refs_walk_ops = { .pmd_entry = clear_refs_pte_range, .test_walk = clear_refs_test_walk, .walk_lock = PGWALK_WRLOCK, }; static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task; char buffer[PROC_NUMBUF] = {}; struct mm_struct *mm; struct vm_area_struct *vma; enum clear_refs_types type; int itype; int rv; if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; rv = kstrtoint(strstrip(buffer), 10, &itype); if (rv < 0) return rv; type = (enum clear_refs_types)itype; if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST) return -EINVAL; task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; mm = get_task_mm(task); if (mm) { VMA_ITERATOR(vmi, mm, 0); struct mmu_notifier_range range; struct clear_refs_private cp = { .type = type, }; if (mmap_write_lock_killable(mm)) { count = -EINTR; goto out_mm; } if (type == CLEAR_REFS_MM_HIWATER_RSS) { /* * Writing 5 to /proc/pid/clear_refs resets the peak * resident set size to this mm's current rss value. */ reset_mm_hiwater_rss(mm); goto out_unlock; } if (type == CLEAR_REFS_SOFT_DIRTY) { for_each_vma(vmi, vma) { if (!(vma->vm_flags & VM_SOFTDIRTY)) continue; vm_flags_clear(vma, VM_SOFTDIRTY); vma_set_page_prot(vma); } inc_tlb_flush_pending(mm); mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY, 0, mm, 0, -1UL); mmu_notifier_invalidate_range_start(&range); } walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp); if (type == CLEAR_REFS_SOFT_DIRTY) { mmu_notifier_invalidate_range_end(&range); flush_tlb_mm(mm); dec_tlb_flush_pending(mm); } out_unlock: mmap_write_unlock(mm); out_mm: mmput(mm); } put_task_struct(task); return count; } const struct file_operations proc_clear_refs_operations = { .write = clear_refs_write, .llseek = noop_llseek, }; typedef struct { u64 pme; } pagemap_entry_t; struct pagemapread { int pos, len; /* units: PM_ENTRY_BYTES, not bytes */ pagemap_entry_t *buffer; bool show_pfn; }; #define PAGEMAP_WALK_SIZE (PMD_SIZE) #define PAGEMAP_WALK_MASK (PMD_MASK) #define PM_ENTRY_BYTES sizeof(pagemap_entry_t) #define PM_PFRAME_BITS 55 #define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0) #define PM_SOFT_DIRTY BIT_ULL(55) #define PM_MMAP_EXCLUSIVE BIT_ULL(56) #define PM_UFFD_WP BIT_ULL(57) #define PM_GUARD_REGION BIT_ULL(58) #define PM_FILE BIT_ULL(61) #define PM_SWAP BIT_ULL(62) #define PM_PRESENT BIT_ULL(63) #define PM_END_OF_BUFFER 1 static inline pagemap_entry_t make_pme(u64 frame, u64 flags) { return (pagemap_entry_t) { .pme = (frame & PM_PFRAME_MASK) | flags }; } static int add_to_pagemap(pagemap_entry_t *pme, struct pagemapread *pm) { pm->buffer[pm->pos++] = *pme; if (pm->pos >= pm->len) return PM_END_OF_BUFFER; return 0; } static bool __folio_page_mapped_exclusively(struct folio *folio, struct page *page) { if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) return folio_precise_page_mapcount(folio, page) == 1; return !folio_maybe_mapped_shared(folio); } static int pagemap_pte_hole(unsigned long start, unsigned long end, __always_unused int depth, struct mm_walk *walk) { struct pagemapread *pm = walk->private; unsigned long addr = start; int err = 0; while (addr < end) { struct vm_area_struct *vma = find_vma(walk->mm, addr); pagemap_entry_t pme = make_pme(0, 0); /* End of address space hole, which we mark as non-present. */ unsigned long hole_end; if (vma) hole_end = min(end, vma->vm_start); else hole_end = end; for (; addr < hole_end; addr += PAGE_SIZE) { err = add_to_pagemap(&pme, pm); if (err) goto out; } if (!vma) break; /* Addresses in the VMA. */ if (vma->vm_flags & VM_SOFTDIRTY) pme = make_pme(0, PM_SOFT_DIRTY); for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) { err = add_to_pagemap(&pme, pm); if (err) goto out; } } out: return err; } static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, struct vm_area_struct *vma, unsigned long addr, pte_t pte) { u64 frame = 0, flags = 0; struct page *page = NULL; struct folio *folio; if (pte_present(pte)) { if (pm->show_pfn) frame = pte_pfn(pte); flags |= PM_PRESENT; page = vm_normal_page(vma, addr, pte); if (pte_soft_dirty(pte)) flags |= PM_SOFT_DIRTY; if (pte_uffd_wp(pte)) flags |= PM_UFFD_WP; } else if (is_swap_pte(pte)) { swp_entry_t entry; if (pte_swp_soft_dirty(pte)) flags |= PM_SOFT_DIRTY; if (pte_swp_uffd_wp(pte)) flags |= PM_UFFD_WP; entry = pte_to_swp_entry(pte); if (pm->show_pfn) { pgoff_t offset; /* * For PFN swap offsets, keeping the offset field * to be PFN only to be compatible with old smaps. */ if (is_pfn_swap_entry(entry)) offset = swp_offset_pfn(entry); else offset = swp_offset(entry); frame = swp_type(entry) | (offset << MAX_SWAPFILES_SHIFT); } flags |= PM_SWAP; if (is_pfn_swap_entry(entry)) page = pfn_swap_entry_to_page(entry); if (pte_marker_entry_uffd_wp(entry)) flags |= PM_UFFD_WP; if (is_guard_swp_entry(entry)) flags |= PM_GUARD_REGION; } if (page) { folio = page_folio(page); if (!folio_test_anon(folio)) flags |= PM_FILE; if ((flags & PM_PRESENT) && __folio_page_mapped_exclusively(folio, page)) flags |= PM_MMAP_EXCLUSIVE; } if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; return make_pme(frame, flags); } static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->vma; struct pagemapread *pm = walk->private; spinlock_t *ptl; pte_t *pte, *orig_pte; int err = 0; #ifdef CONFIG_TRANSPARENT_HUGEPAGE ptl = pmd_trans_huge_lock(pmdp, vma); if (ptl) { unsigned int idx = (addr & ~PMD_MASK) >> PAGE_SHIFT; u64 flags = 0, frame = 0; pmd_t pmd = *pmdp; struct page *page = NULL; struct folio *folio = NULL; if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; if (pmd_present(pmd)) { page = pmd_page(pmd); flags |= PM_PRESENT; if (pmd_soft_dirty(pmd)) flags |= PM_SOFT_DIRTY; if (pmd_uffd_wp(pmd)) flags |= PM_UFFD_WP; if (pm->show_pfn) frame = pmd_pfn(pmd) + idx; } #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION else if (is_swap_pmd(pmd)) { swp_entry_t entry = pmd_to_swp_entry(pmd); unsigned long offset; if (pm->show_pfn) { if (is_pfn_swap_entry(entry)) offset = swp_offset_pfn(entry) + idx; else offset = swp_offset(entry) + idx; frame = swp_type(entry) | (offset << MAX_SWAPFILES_SHIFT); } flags |= PM_SWAP; if (pmd_swp_soft_dirty(pmd)) flags |= PM_SOFT_DIRTY; if (pmd_swp_uffd_wp(pmd)) flags |= PM_UFFD_WP; VM_BUG_ON(!is_pmd_migration_entry(pmd)); page = pfn_swap_entry_to_page(entry); } #endif if (page) { folio = page_folio(page); if (!folio_test_anon(folio)) flags |= PM_FILE; } for (; addr != end; addr += PAGE_SIZE, idx++) { u64 cur_flags = flags; pagemap_entry_t pme; if (folio && (flags & PM_PRESENT) && __folio_page_mapped_exclusively(folio, page)) cur_flags |= PM_MMAP_EXCLUSIVE; pme = make_pme(frame, cur_flags); err = add_to_pagemap(&pme, pm); if (err) break; if (pm->show_pfn) { if (flags & PM_PRESENT) frame++; else if (flags & PM_SWAP) frame += (1 << MAX_SWAPFILES_SHIFT); } } spin_unlock(ptl); return err; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* * We can assume that @vma always points to a valid one and @end never * goes beyond vma->vm_end. */ orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl); if (!pte) { walk->action = ACTION_AGAIN; return err; } for (; addr < end; pte++, addr += PAGE_SIZE) { pagemap_entry_t pme; pme = pte_to_pagemap_entry(pm, vma, addr, ptep_get(pte)); err = add_to_pagemap(&pme, pm); if (err) break; } pte_unmap_unlock(orig_pte, ptl); cond_resched(); return err; } #ifdef CONFIG_HUGETLB_PAGE /* This function walks within one hugetlb entry in the single call */ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct pagemapread *pm = walk->private; struct vm_area_struct *vma = walk->vma; u64 flags = 0, frame = 0; int err = 0; pte_t pte; if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; pte = huge_ptep_get(walk->mm, addr, ptep); if (pte_present(pte)) { struct folio *folio = page_folio(pte_page(pte)); if (!folio_test_anon(folio)) flags |= PM_FILE; if (!folio_maybe_mapped_shared(folio) && !hugetlb_pmd_shared(ptep)) flags |= PM_MMAP_EXCLUSIVE; if (huge_pte_uffd_wp(pte)) flags |= PM_UFFD_WP; flags |= PM_PRESENT; if (pm->show_pfn) frame = pte_pfn(pte) + ((addr & ~hmask) >> PAGE_SHIFT); } else if (pte_swp_uffd_wp_any(pte)) { flags |= PM_UFFD_WP; } for (; addr != end; addr += PAGE_SIZE) { pagemap_entry_t pme = make_pme(frame, flags); err = add_to_pagemap(&pme, pm); if (err) return err; if (pm->show_pfn && (flags & PM_PRESENT)) frame++; } cond_resched(); return err; } #else #define pagemap_hugetlb_range NULL #endif /* HUGETLB_PAGE */ static const struct mm_walk_ops pagemap_ops = { .pmd_entry = pagemap_pmd_range, .pte_hole = pagemap_pte_hole, .hugetlb_entry = pagemap_hugetlb_range, .walk_lock = PGWALK_RDLOCK, }; /* * /proc/pid/pagemap - an array mapping virtual pages to pfns * * For each page in the address space, this file contains one 64-bit entry * consisting of the following: * * Bits 0-54 page frame number (PFN) if present * Bits 0-4 swap type if swapped * Bits 5-54 swap offset if swapped * Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst) * Bit 56 page exclusively mapped * Bit 57 pte is uffd-wp write-protected * Bit 58 pte is a guard region * Bits 59-60 zero * Bit 61 page is file-page or shared-anon * Bit 62 page swapped * Bit 63 page present * * If the page is not present but in swap, then the PFN contains an * encoding of the swap file number and the page's offset into the * swap. Unmapped pages return a null PFN. This allows determining * precisely which pages are mapped (or in swap) and comparing mapped * pages between processes. * * Efficient users of this interface will use /proc/pid/maps to * determine which areas of memory are actually mapped and llseek to * skip over unmapped regions. */ static ssize_t pagemap_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct mm_struct *mm = file->private_data; struct pagemapread pm; unsigned long src; unsigned long svpfn; unsigned long start_vaddr; unsigned long end_vaddr; int ret = 0, copied = 0; if (!mm || !mmget_not_zero(mm)) goto out; ret = -EINVAL; /* file position must be aligned */ if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES)) goto out_mm; ret = 0; if (!count) goto out_mm; /* do not disclose physical addresses: attack vector */ pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN); pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); pm.buffer = kmalloc_array(pm.len, PM_ENTRY_BYTES, GFP_KERNEL); ret = -ENOMEM; if (!pm.buffer) goto out_mm; src = *ppos; svpfn = src / PM_ENTRY_BYTES; end_vaddr = mm->task_size; /* watch out for wraparound */ start_vaddr = end_vaddr; if (svpfn <= (ULONG_MAX >> PAGE_SHIFT)) { unsigned long end; ret = mmap_read_lock_killable(mm); if (ret) goto out_free; start_vaddr = untagged_addr_remote(mm, svpfn << PAGE_SHIFT); mmap_read_unlock(mm); end = start_vaddr + ((count / PM_ENTRY_BYTES) << PAGE_SHIFT); if (end >= start_vaddr && end < mm->task_size) end_vaddr = end; } /* Ensure the address is inside the task */ if (start_vaddr > mm->task_size) start_vaddr = end_vaddr; ret = 0; while (count && (start_vaddr < end_vaddr)) { int len; unsigned long end; pm.pos = 0; end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK; /* overflow ? */ if (end < start_vaddr || end > end_vaddr) end = end_vaddr; ret = mmap_read_lock_killable(mm); if (ret) goto out_free; ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm); mmap_read_unlock(mm); start_vaddr = end; len = min(count, PM_ENTRY_BYTES * pm.pos); if (copy_to_user(buf, pm.buffer, len)) { ret = -EFAULT; goto out_free; } copied += len; buf += len; count -= len; } *ppos += copied; if (!ret || ret == PM_END_OF_BUFFER) ret = copied; out_free: kfree(pm.buffer); out_mm: mmput(mm); out: return ret; } static int pagemap_open(struct inode *inode, struct file *file) { struct mm_struct *mm; mm = proc_mem_open(inode, PTRACE_MODE_READ); if (IS_ERR_OR_NULL(mm)) return mm ? PTR_ERR(mm) : -ESRCH; file->private_data = mm; return 0; } static int pagemap_release(struct inode *inode, struct file *file) { struct mm_struct *mm = file->private_data; if (mm) mmdrop(mm); return 0; } #define PM_SCAN_CATEGORIES (PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | \ PAGE_IS_FILE | PAGE_IS_PRESENT | \ PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | \ PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY | \ PAGE_IS_GUARD) #define PM_SCAN_FLAGS (PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC) struct pagemap_scan_private { struct pm_scan_arg arg; unsigned long masks_of_interest, cur_vma_category; struct page_region *vec_buf; unsigned long vec_buf_len, vec_buf_index, found_pages; struct page_region __user *vec_out; }; static unsigned long pagemap_page_category(struct pagemap_scan_private *p, struct vm_area_struct *vma, unsigned long addr, pte_t pte) { unsigned long categories = 0; if (pte_present(pte)) { struct page *page; categories |= PAGE_IS_PRESENT; if (!pte_uffd_wp(pte)) categories |= PAGE_IS_WRITTEN; if (p->masks_of_interest & PAGE_IS_FILE) { page = vm_normal_page(vma, addr, pte); if (page && !PageAnon(page)) categories |= PAGE_IS_FILE; } if (is_zero_pfn(pte_pfn(pte))) categories |= PAGE_IS_PFNZERO; if (pte_soft_dirty(pte)) categories |= PAGE_IS_SOFT_DIRTY; } else if (is_swap_pte(pte)) { swp_entry_t swp; categories |= PAGE_IS_SWAPPED; if (!pte_swp_uffd_wp_any(pte)) categories |= PAGE_IS_WRITTEN; swp = pte_to_swp_entry(pte); if (is_guard_swp_entry(swp)) categories |= PAGE_IS_GUARD; else if ((p->masks_of_interest & PAGE_IS_FILE) && is_pfn_swap_entry(swp) && !folio_test_anon(pfn_swap_entry_folio(swp))) categories |= PAGE_IS_FILE; if (pte_swp_soft_dirty(pte)) categories |= PAGE_IS_SOFT_DIRTY; } return categories; } static void make_uffd_wp_pte(struct vm_area_struct *vma, unsigned long addr, pte_t *pte, pte_t ptent) { if (pte_present(ptent)) { pte_t old_pte; old_pte = ptep_modify_prot_start(vma, addr, pte); ptent = pte_mkuffd_wp(old_pte); ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent); } else if (is_swap_pte(ptent)) { ptent = pte_swp_mkuffd_wp(ptent); set_pte_at(vma->vm_mm, addr, pte, ptent); } else { set_pte_at(vma->vm_mm, addr, pte, make_pte_marker(PTE_MARKER_UFFD_WP)); } } #ifdef CONFIG_TRANSPARENT_HUGEPAGE static unsigned long pagemap_thp_category(struct pagemap_scan_private *p, struct vm_area_struct *vma, unsigned long addr, pmd_t pmd) { unsigned long categories = PAGE_IS_HUGE; if (pmd_present(pmd)) { struct page *page; categories |= PAGE_IS_PRESENT; if (!pmd_uffd_wp(pmd)) categories |= PAGE_IS_WRITTEN; if (p->masks_of_interest & PAGE_IS_FILE) { page = vm_normal_page_pmd(vma, addr, pmd); if (page && !PageAnon(page)) categories |= PAGE_IS_FILE; } if (is_huge_zero_pmd(pmd)) categories |= PAGE_IS_PFNZERO; if (pmd_soft_dirty(pmd)) categories |= PAGE_IS_SOFT_DIRTY; } else if (is_swap_pmd(pmd)) { swp_entry_t swp; categories |= PAGE_IS_SWAPPED; if (!pmd_swp_uffd_wp(pmd)) categories |= PAGE_IS_WRITTEN; if (pmd_swp_soft_dirty(pmd)) categories |= PAGE_IS_SOFT_DIRTY; if (p->masks_of_interest & PAGE_IS_FILE) { swp = pmd_to_swp_entry(pmd); if (is_pfn_swap_entry(swp) && !folio_test_anon(pfn_swap_entry_folio(swp))) categories |= PAGE_IS_FILE; } } return categories; } static void make_uffd_wp_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { pmd_t old, pmd = *pmdp; if (pmd_present(pmd)) { old = pmdp_invalidate_ad(vma, addr, pmdp); pmd = pmd_mkuffd_wp(old); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); } else if (is_migration_entry(pmd_to_swp_entry(pmd))) { pmd = pmd_swp_mkuffd_wp(pmd); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); } } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_HUGETLB_PAGE static unsigned long pagemap_hugetlb_category(pte_t pte) { unsigned long categories = PAGE_IS_HUGE; /* * According to pagemap_hugetlb_range(), file-backed HugeTLB * page cannot be swapped. So PAGE_IS_FILE is not checked for * swapped pages. */ if (pte_present(pte)) { categories |= PAGE_IS_PRESENT; if (!huge_pte_uffd_wp(pte)) categories |= PAGE_IS_WRITTEN; if (!PageAnon(pte_page(pte))) categories |= PAGE_IS_FILE; if (is_zero_pfn(pte_pfn(pte))) categories |= PAGE_IS_PFNZERO; if (pte_soft_dirty(pte)) categories |= PAGE_IS_SOFT_DIRTY; } else if (is_swap_pte(pte)) { categories |= PAGE_IS_SWAPPED; if (!pte_swp_uffd_wp_any(pte)) categories |= PAGE_IS_WRITTEN; if (pte_swp_soft_dirty(pte)) categories |= PAGE_IS_SOFT_DIRTY; } return categories; } static void make_uffd_wp_huge_pte(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t ptent) { unsigned long psize; if (is_hugetlb_entry_hwpoisoned(ptent) || is_pte_marker(ptent)) return; psize = huge_page_size(hstate_vma(vma)); if (is_hugetlb_entry_migration(ptent)) set_huge_pte_at(vma->vm_mm, addr, ptep, pte_swp_mkuffd_wp(ptent), psize); else if (!huge_pte_none(ptent)) huge_ptep_modify_prot_commit(vma, addr, ptep, ptent, huge_pte_mkuffd_wp(ptent)); else set_huge_pte_at(vma->vm_mm, addr, ptep, make_pte_marker(PTE_MARKER_UFFD_WP), psize); } #endif /* CONFIG_HUGETLB_PAGE */ #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) static void pagemap_scan_backout_range(struct pagemap_scan_private *p, unsigned long addr, unsigned long end) { struct page_region *cur_buf = &p->vec_buf[p->vec_buf_index]; if (cur_buf->start != addr) cur_buf->end = addr; else cur_buf->start = cur_buf->end = 0; p->found_pages -= (end - addr) / PAGE_SIZE; } #endif static bool pagemap_scan_is_interesting_page(unsigned long categories, const struct pagemap_scan_private *p) { categories ^= p->arg.category_inverted; if ((categories & p->arg.category_mask) != p->arg.category_mask) return false; if (p->arg.category_anyof_mask && !(categories & p->arg.category_anyof_mask)) return false; return true; } static bool pagemap_scan_is_interesting_vma(unsigned long categories, const struct pagemap_scan_private *p) { unsigned long required = p->arg.category_mask & PAGE_IS_WPALLOWED; categories ^= p->arg.category_inverted; if ((categories & required) != required) return false; return true; } static int pagemap_scan_test_walk(unsigned long start, unsigned long end, struct mm_walk *walk) { struct pagemap_scan_private *p = walk->private; struct vm_area_struct *vma = walk->vma; unsigned long vma_category = 0; bool wp_allowed = userfaultfd_wp_async(vma) && userfaultfd_wp_use_markers(vma); if (!wp_allowed) { /* User requested explicit failure over wp-async capability */ if (p->arg.flags & PM_SCAN_CHECK_WPASYNC) return -EPERM; /* * User requires wr-protect, and allows silently skipping * unsupported vmas. */ if (p->arg.flags & PM_SCAN_WP_MATCHING) return 1; /* * Then the request doesn't involve wr-protects at all, * fall through to the rest checks, and allow vma walk. */ } if (vma->vm_flags & VM_PFNMAP) return 1; if (wp_allowed) vma_category |= PAGE_IS_WPALLOWED; if (vma->vm_flags & VM_SOFTDIRTY) vma_category |= PAGE_IS_SOFT_DIRTY; if (!pagemap_scan_is_interesting_vma(vma_category, p)) return 1; p->cur_vma_category = vma_category; return 0; } static bool pagemap_scan_push_range(unsigned long categories, struct pagemap_scan_private *p, unsigned long addr, unsigned long end) { struct page_region *cur_buf = &p->vec_buf[p->vec_buf_index]; /* * When there is no output buffer provided at all, the sentinel values * won't match here. There is no other way for `cur_buf->end` to be * non-zero other than it being non-empty. */ if (addr == cur_buf->end && categories == cur_buf->categories) { cur_buf->end = end; return true; } if (cur_buf->end) { if (p->vec_buf_index >= p->vec_buf_len - 1) return false; cur_buf = &p->vec_buf[++p->vec_buf_index]; } cur_buf->start = addr; cur_buf->end = end; cur_buf->categories = categories; return true; } static int pagemap_scan_output(unsigned long categories, struct pagemap_scan_private *p, unsigned long addr, unsigned long *end) { unsigned long n_pages, total_pages; int ret = 0; if (!p->vec_buf) return 0; categories &= p->arg.return_mask; n_pages = (*end - addr) / PAGE_SIZE; if (check_add_overflow(p->found_pages, n_pages, &total_pages) || total_pages > p->arg.max_pages) { size_t n_too_much = total_pages - p->arg.max_pages; *end -= n_too_much * PAGE_SIZE; n_pages -= n_too_much; ret = -ENOSPC; } if (!pagemap_scan_push_range(categories, p, addr, *end)) { *end = addr; n_pages = 0; ret = -ENOSPC; } p->found_pages += n_pages; if (ret) p->arg.walk_end = *end; return ret; } static int pagemap_scan_thp_entry(pmd_t *pmd, unsigned long start, unsigned long end, struct mm_walk *walk) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE struct pagemap_scan_private *p = walk->private; struct vm_area_struct *vma = walk->vma; unsigned long categories; spinlock_t *ptl; int ret = 0; ptl = pmd_trans_huge_lock(pmd, vma); if (!ptl) return -ENOENT; categories = p->cur_vma_category | pagemap_thp_category(p, vma, start, *pmd); if (!pagemap_scan_is_interesting_page(categories, p)) goto out_unlock; ret = pagemap_scan_output(categories, p, start, &end); if (start == end) goto out_unlock; if (~p->arg.flags & PM_SCAN_WP_MATCHING) goto out_unlock; if (~categories & PAGE_IS_WRITTEN) goto out_unlock; /* * Break huge page into small pages if the WP operation * needs to be performed on a portion of the huge page. */ if (end != start + HPAGE_SIZE) { spin_unlock(ptl); split_huge_pmd(vma, pmd, start); pagemap_scan_backout_range(p, start, end); /* Report as if there was no THP */ return -ENOENT; } make_uffd_wp_pmd(vma, start, pmd); flush_tlb_range(vma, start, end); out_unlock: spin_unlock(ptl); return ret; #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ return -ENOENT; #endif } static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, unsigned long end, struct mm_walk *walk) { struct pagemap_scan_private *p = walk->private; struct vm_area_struct *vma = walk->vma; unsigned long addr, flush_end = 0; pte_t *pte, *start_pte; spinlock_t *ptl; int ret; ret = pagemap_scan_thp_entry(pmd, start, end, walk); if (ret != -ENOENT) return ret; ret = 0; start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl); if (!pte) { walk->action = ACTION_AGAIN; return 0; } arch_enter_lazy_mmu_mode(); if ((p->arg.flags & PM_SCAN_WP_MATCHING) && !p->vec_out) { /* Fast path for performing exclusive WP */ for (addr = start; addr != end; pte++, addr += PAGE_SIZE) { pte_t ptent = ptep_get(pte); if ((pte_present(ptent) && pte_uffd_wp(ptent)) || pte_swp_uffd_wp_any(ptent)) continue; make_uffd_wp_pte(vma, addr, pte, ptent); if (!flush_end) start = addr; flush_end = addr + PAGE_SIZE; } goto flush_and_return; } if (!p->arg.category_anyof_mask && !p->arg.category_inverted && p->arg.category_mask == PAGE_IS_WRITTEN && p->arg.return_mask == PAGE_IS_WRITTEN) { for (addr = start; addr < end; pte++, addr += PAGE_SIZE) { unsigned long next = addr + PAGE_SIZE; pte_t ptent = ptep_get(pte); if ((pte_present(ptent) && pte_uffd_wp(ptent)) || pte_swp_uffd_wp_any(ptent)) continue; ret = pagemap_scan_output(p->cur_vma_category | PAGE_IS_WRITTEN, p, addr, &next); if (next == addr) break; if (~p->arg.flags & PM_SCAN_WP_MATCHING) continue; make_uffd_wp_pte(vma, addr, pte, ptent); if (!flush_end) start = addr; flush_end = next; } goto flush_and_return; } for (addr = start; addr != end; pte++, addr += PAGE_SIZE) { pte_t ptent = ptep_get(pte); unsigned long categories = p->cur_vma_category | pagemap_page_category(p, vma, addr, ptent); unsigned long next = addr + PAGE_SIZE; if (!pagemap_scan_is_interesting_page(categories, p)) continue; ret = pagemap_scan_output(categories, p, addr, &next); if (next == addr) break; if (~p->arg.flags & PM_SCAN_WP_MATCHING) continue; if (~categories & PAGE_IS_WRITTEN) continue; make_uffd_wp_pte(vma, addr, pte, ptent); if (!flush_end) start = addr; flush_end = next; } flush_and_return: if (flush_end) flush_tlb_range(vma, start, addr); arch_leave_lazy_mmu_mode(); pte_unmap_unlock(start_pte, ptl); cond_resched(); return ret; } #ifdef CONFIG_HUGETLB_PAGE static int pagemap_scan_hugetlb_entry(pte_t *ptep, unsigned long hmask, unsigned long start, unsigned long end, struct mm_walk *walk) { struct pagemap_scan_private *p = walk->private; struct vm_area_struct *vma = walk->vma; unsigned long categories; spinlock_t *ptl; int ret = 0; pte_t pte; if (~p->arg.flags & PM_SCAN_WP_MATCHING) { /* Go the short route when not write-protecting pages. */ pte = huge_ptep_get(walk->mm, start, ptep); categories = p->cur_vma_category | pagemap_hugetlb_category(pte); if (!pagemap_scan_is_interesting_page(categories, p)) return 0; return pagemap_scan_output(categories, p, start, &end); } i_mmap_lock_write(vma->vm_file->f_mapping); ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, ptep); pte = huge_ptep_get(walk->mm, start, ptep); categories = p->cur_vma_category | pagemap_hugetlb_category(pte); if (!pagemap_scan_is_interesting_page(categories, p)) goto out_unlock; ret = pagemap_scan_output(categories, p, start, &end); if (start == end) goto out_unlock; if (~categories & PAGE_IS_WRITTEN) goto out_unlock; if (end != start + HPAGE_SIZE) { /* Partial HugeTLB page WP isn't possible. */ pagemap_scan_backout_range(p, start, end); p->arg.walk_end = start; ret = 0; goto out_unlock; } make_uffd_wp_huge_pte(vma, start, ptep, pte); flush_hugetlb_tlb_range(vma, start, end); out_unlock: spin_unlock(ptl); i_mmap_unlock_write(vma->vm_file->f_mapping); return ret; } #else #define pagemap_scan_hugetlb_entry NULL #endif static int pagemap_scan_pte_hole(unsigned long addr, unsigned long end, int depth, struct mm_walk *walk) { struct pagemap_scan_private *p = walk->private; struct vm_area_struct *vma = walk->vma; int ret, err; if (!vma || !pagemap_scan_is_interesting_page(p->cur_vma_category, p)) return 0; ret = pagemap_scan_output(p->cur_vma_category, p, addr, &end); if (addr == end) return ret; if (~p->arg.flags & PM_SCAN_WP_MATCHING) return ret; err = uffd_wp_range(vma, addr, end - addr, true); if (err < 0) ret = err; return ret; } static const struct mm_walk_ops pagemap_scan_ops = { .test_walk = pagemap_scan_test_walk, .pmd_entry = pagemap_scan_pmd_entry, .pte_hole = pagemap_scan_pte_hole, .hugetlb_entry = pagemap_scan_hugetlb_entry, }; static int pagemap_scan_get_args(struct pm_scan_arg *arg, unsigned long uarg) { if (copy_from_user(arg, (void __user *)uarg, sizeof(*arg))) return -EFAULT; if (arg->size != sizeof(struct pm_scan_arg)) return -EINVAL; /* Validate requested features */ if (arg->flags & ~PM_SCAN_FLAGS) return -EINVAL; if ((arg->category_inverted | arg->category_mask | arg->category_anyof_mask | arg->return_mask) & ~PM_SCAN_CATEGORIES) return -EINVAL; arg->start = untagged_addr((unsigned long)arg->start); arg->end = untagged_addr((unsigned long)arg->end); arg->vec = untagged_addr((unsigned long)arg->vec); /* Validate memory pointers */ if (!IS_ALIGNED(arg->start, PAGE_SIZE)) return -EINVAL; if (!access_ok((void __user *)(long)arg->start, arg->end - arg->start)) return -EFAULT; if (!arg->vec && arg->vec_len) return -EINVAL; if (UINT_MAX == SIZE_MAX && arg->vec_len > SIZE_MAX) return -EINVAL; if (arg->vec && !access_ok((void __user *)(long)arg->vec, size_mul(arg->vec_len, sizeof(struct page_region)))) return -EFAULT; /* Fixup default values */ arg->end = ALIGN(arg->end, PAGE_SIZE); arg->walk_end = 0; if (!arg->max_pages) arg->max_pages = ULONG_MAX; return 0; } static int pagemap_scan_writeback_args(struct pm_scan_arg *arg, unsigned long uargl) { struct pm_scan_arg __user *uarg = (void __user *)uargl; if (copy_to_user(&uarg->walk_end, &arg->walk_end, sizeof(arg->walk_end))) return -EFAULT; return 0; } static int pagemap_scan_init_bounce_buffer(struct pagemap_scan_private *p) { if (!p->arg.vec_len) return 0; p->vec_buf_len = min_t(size_t, PAGEMAP_WALK_SIZE >> PAGE_SHIFT, p->arg.vec_len); p->vec_buf = kmalloc_array(p->vec_buf_len, sizeof(*p->vec_buf), GFP_KERNEL); if (!p->vec_buf) return -ENOMEM; p->vec_buf->start = p->vec_buf->end = 0; p->vec_out = (struct page_region __user *)(long)p->arg.vec; return 0; } static long pagemap_scan_flush_buffer(struct pagemap_scan_private *p) { const struct page_region *buf = p->vec_buf; long n = p->vec_buf_index; if (!p->vec_buf) return 0; if (buf[n].end != buf[n].start) n++; if (!n) return 0; if (copy_to_user(p->vec_out, buf, n * sizeof(*buf))) return -EFAULT; p->arg.vec_len -= n; p->vec_out += n; p->vec_buf_index = 0; p->vec_buf_len = min_t(size_t, p->vec_buf_len, p->arg.vec_len); p->vec_buf->start = p->vec_buf->end = 0; return n; } static long do_pagemap_scan(struct mm_struct *mm, unsigned long uarg) { struct pagemap_scan_private p = {0}; unsigned long walk_start; size_t n_ranges_out = 0; int ret; ret = pagemap_scan_get_args(&p.arg, uarg); if (ret) return ret; p.masks_of_interest = p.arg.category_mask | p.arg.category_anyof_mask | p.arg.return_mask; ret = pagemap_scan_init_bounce_buffer(&p); if (ret) return ret; for (walk_start = p.arg.start; walk_start < p.arg.end; walk_start = p.arg.walk_end) { struct mmu_notifier_range range; long n_out; if (fatal_signal_pending(current)) { ret = -EINTR; break; } ret = mmap_read_lock_killable(mm); if (ret) break; /* Protection change for the range is going to happen. */ if (p.arg.flags & PM_SCAN_WP_MATCHING) { mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_VMA, 0, mm, walk_start, p.arg.end); mmu_notifier_invalidate_range_start(&range); } ret = walk_page_range(mm, walk_start, p.arg.end, &pagemap_scan_ops, &p); if (p.arg.flags & PM_SCAN_WP_MATCHING) mmu_notifier_invalidate_range_end(&range); mmap_read_unlock(mm); n_out = pagemap_scan_flush_buffer(&p); if (n_out < 0) ret = n_out; else n_ranges_out += n_out; if (ret != -ENOSPC) break; if (p.arg.vec_len == 0 || p.found_pages == p.arg.max_pages) break; } /* ENOSPC signifies early stop (buffer full) from the walk. */ if (!ret || ret == -ENOSPC) ret = n_ranges_out; /* The walk_end isn't set when ret is zero */ if (!p.arg.walk_end) p.arg.walk_end = p.arg.end; if (pagemap_scan_writeback_args(&p.arg, uarg)) ret = -EFAULT; kfree(p.vec_buf); return ret; } static long do_pagemap_cmd(struct file *file, unsigned int cmd, unsigned long arg) { struct mm_struct *mm = file->private_data; switch (cmd) { case PAGEMAP_SCAN: return do_pagemap_scan(mm, arg); default: return -EINVAL; } } const struct file_operations proc_pagemap_operations = { .llseek = mem_lseek, /* borrow this */ .read = pagemap_read, .open = pagemap_open, .release = pagemap_release, .unlocked_ioctl = do_pagemap_cmd, .compat_ioctl = do_pagemap_cmd, }; #endif /* CONFIG_PROC_PAGE_MONITOR */ #ifdef CONFIG_NUMA struct numa_maps { unsigned long pages; unsigned long anon; unsigned long active; unsigned long writeback; unsigned long mapcount_max; unsigned long dirty; unsigned long swapcache; unsigned long node[MAX_NUMNODES]; }; struct numa_maps_private { struct proc_maps_private proc_maps; struct numa_maps md; }; static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, unsigned long nr_pages) { struct folio *folio = page_folio(page); int count; if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) count = folio_precise_page_mapcount(folio, page); else count = folio_average_page_mapcount(folio); md->pages += nr_pages; if (pte_dirty || folio_test_dirty(folio)) md->dirty += nr_pages; if (folio_test_swapcache(folio)) md->swapcache += nr_pages; if (folio_test_active(folio) || folio_test_unevictable(folio)) md->active += nr_pages; if (folio_test_writeback(folio)) md->writeback += nr_pages; if (folio_test_anon(folio)) md->anon += nr_pages; if (count > md->mapcount_max) md->mapcount_max = count; md->node[folio_nid(folio)] += nr_pages; } static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, unsigned long addr) { struct page *page; int nid; if (!pte_present(pte)) return NULL; page = vm_normal_page(vma, addr, pte); if (!page || is_zone_device_page(page)) return NULL; if (PageReserved(page)) return NULL; nid = page_to_nid(page); if (!node_isset(nid, node_states[N_MEMORY])) return NULL; return page; } #ifdef CONFIG_TRANSPARENT_HUGEPAGE static struct page *can_gather_numa_stats_pmd(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr) { struct page *page; int nid; if (!pmd_present(pmd)) return NULL; page = vm_normal_page_pmd(vma, addr, pmd); if (!page) return NULL; if (PageReserved(page)) return NULL; nid = page_to_nid(page); if (!node_isset(nid, node_states[N_MEMORY])) return NULL; return page; } #endif static int gather_pte_stats(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct numa_maps *md = walk->private; struct vm_area_struct *vma = walk->vma; spinlock_t *ptl; pte_t *orig_pte; pte_t *pte; #ifdef CONFIG_TRANSPARENT_HUGEPAGE ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { struct page *page; page = can_gather_numa_stats_pmd(*pmd, vma, addr); if (page) gather_stats(page, md, pmd_dirty(*pmd), HPAGE_PMD_SIZE/PAGE_SIZE); spin_unlock(ptl); return 0; } #endif orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); if (!pte) { walk->action = ACTION_AGAIN; return 0; } do { pte_t ptent = ptep_get(pte); struct page *page = can_gather_numa_stats(ptent, vma, addr); if (!page) continue; gather_stats(page, md, pte_dirty(ptent), 1); } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap_unlock(orig_pte, ptl); cond_resched(); return 0; } #ifdef CONFIG_HUGETLB_PAGE static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { pte_t huge_pte = huge_ptep_get(walk->mm, addr, pte); struct numa_maps *md; struct page *page; if (!pte_present(huge_pte)) return 0; page = pte_page(huge_pte); md = walk->private; gather_stats(page, md, pte_dirty(huge_pte), 1); return 0; } #else static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { return 0; } #endif static const struct mm_walk_ops show_numa_ops = { .hugetlb_entry = gather_hugetlb_stats, .pmd_entry = gather_pte_stats, .walk_lock = PGWALK_RDLOCK, }; /* * Display pages allocated per node and memory policy via /proc. */ static int show_numa_map(struct seq_file *m, void *v) { struct numa_maps_private *numa_priv = m->private; struct proc_maps_private *proc_priv = &numa_priv->proc_maps; struct vm_area_struct *vma = v; struct numa_maps *md = &numa_priv->md; struct file *file = vma->vm_file; struct mm_struct *mm = vma->vm_mm; char buffer[64]; struct mempolicy *pol; pgoff_t ilx; int nid; if (!mm) return 0; /* Ensure we start with an empty set of numa_maps statistics. */ memset(md, 0, sizeof(*md)); pol = __get_vma_policy(vma, vma->vm_start, &ilx); if (pol) { mpol_to_str(buffer, sizeof(buffer), pol); mpol_cond_put(pol); } else { mpol_to_str(buffer, sizeof(buffer), proc_priv->task_mempolicy); } seq_printf(m, "%08lx %s", vma->vm_start, buffer); if (file) { seq_puts(m, " file="); seq_path(m, file_user_path(file), "\n\t= "); } else if (vma_is_initial_heap(vma)) { seq_puts(m, " heap"); } else if (vma_is_initial_stack(vma)) { seq_puts(m, " stack"); } if (is_vm_hugetlb_page(vma)) seq_puts(m, " huge"); /* mmap_lock is held by m_start */ walk_page_vma(vma, &show_numa_ops, md); if (!md->pages) goto out; if (md->anon) seq_printf(m, " anon=%lu", md->anon); if (md->dirty) seq_printf(m, " dirty=%lu", md->dirty); if (md->pages != md->anon && md->pages != md->dirty) seq_printf(m, " mapped=%lu", md->pages); if (md->mapcount_max > 1) seq_printf(m, " mapmax=%lu", md->mapcount_max); if (md->swapcache) seq_printf(m, " swapcache=%lu", md->swapcache); if (md->active < md->pages && !is_vm_hugetlb_page(vma)) seq_printf(m, " active=%lu", md->active); if (md->writeback) seq_printf(m, " writeback=%lu", md->writeback); for_each_node_state(nid, N_MEMORY) if (md->node[nid]) seq_printf(m, " N%d=%lu", nid, md->node[nid]); seq_printf(m, " kernelpagesize_kB=%lu", vma_kernel_pagesize(vma) >> 10); out: seq_putc(m, '\n'); return 0; } static const struct seq_operations proc_pid_numa_maps_op = { .start = m_start, .next = m_next, .stop = m_stop, .show = show_numa_map, }; static int pid_numa_maps_open(struct inode *inode, struct file *file) { return proc_maps_open(inode, file, &proc_pid_numa_maps_op, sizeof(struct numa_maps_private)); } const struct file_operations proc_pid_numa_maps_operations = { .open = pid_numa_maps_open, .read = seq_read, .llseek = seq_lseek, .release = proc_map_release, }; #endif /* CONFIG_NUMA */
643 584 216 216 305 216 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MEMREMAP_H_ #define _LINUX_MEMREMAP_H_ #include <linux/mmzone.h> #include <linux/range.h> #include <linux/ioport.h> #include <linux/percpu-refcount.h> struct resource; struct device; /** * struct vmem_altmap - pre-allocated storage for vmemmap_populate * @base_pfn: base of the entire dev_pagemap mapping * @reserve: pages mapped, but reserved for driver use (relative to @base) * @free: free pages set aside in the mapping for memmap storage * @align: pages reserved to meet allocation alignments * @alloc: track pages consumed, private to vmemmap_populate() */ struct vmem_altmap { unsigned long base_pfn; const unsigned long end_pfn; const unsigned long reserve; unsigned long free; unsigned long align; unsigned long alloc; bool inaccessible; }; /* * Specialize ZONE_DEVICE memory into multiple types each has a different * usage. * * MEMORY_DEVICE_PRIVATE: * Device memory that is not directly addressable by the CPU: CPU can neither * read nor write private memory. In this case, we do still have struct pages * backing the device memory. Doing so simplifies the implementation, but it is * important to remember that there are certain points at which the struct page * must be treated as an opaque object, rather than a "normal" struct page. * * A more complete discussion of unaddressable memory may be found in * include/linux/hmm.h and Documentation/mm/hmm.rst. * * MEMORY_DEVICE_COHERENT: * Device memory that is cache coherent from device and CPU point of view. This * is used on platforms that have an advanced system bus (like CAPI or CXL). A * driver can hotplug the device memory using ZONE_DEVICE and with that memory * type. Any page of a process can be migrated to such memory. However no one * should be allowed to pin such memory so that it can always be evicted. * * MEMORY_DEVICE_FS_DAX: * Host memory that has similar access semantics as System RAM i.e. DMA * coherent and supports page pinning. In support of coordinating page * pinning vs other operations MEMORY_DEVICE_FS_DAX arranges for a * wakeup event whenever a page is unpinned and becomes idle. This * wakeup is used to coordinate physical address space management (ex: * fs truncate/hole punch) vs pinned pages (ex: device dma). * * MEMORY_DEVICE_GENERIC: * Host memory that has similar access semantics as System RAM i.e. DMA * coherent and supports page pinning. This is for example used by DAX devices * that expose memory using a character device. * * MEMORY_DEVICE_PCI_P2PDMA: * Device memory residing in a PCI BAR intended for use with Peer-to-Peer * transactions. */ enum memory_type { /* 0 is reserved to catch uninitialized type fields */ MEMORY_DEVICE_PRIVATE = 1, MEMORY_DEVICE_COHERENT, MEMORY_DEVICE_FS_DAX, MEMORY_DEVICE_GENERIC, MEMORY_DEVICE_PCI_P2PDMA, }; struct dev_pagemap_ops { /* * Called once the page refcount reaches 0. The reference count will be * reset to one by the core code after the method is called to prepare * for handing out the page again. */ void (*page_free)(struct page *page); /* * Used for private (un-addressable) device memory only. Must migrate * the page back to a CPU accessible page. */ vm_fault_t (*migrate_to_ram)(struct vm_fault *vmf); /* * Handle the memory failure happens on a range of pfns. Notify the * processes who are using these pfns, and try to recover the data on * them if necessary. The mf_flags is finally passed to the recover * function through the whole notify routine. * * When this is not implemented, or it returns -EOPNOTSUPP, the caller * will fall back to a common handler called mf_generic_kill_procs(). */ int (*memory_failure)(struct dev_pagemap *pgmap, unsigned long pfn, unsigned long nr_pages, int mf_flags); }; #define PGMAP_ALTMAP_VALID (1 << 0) /** * struct dev_pagemap - metadata for ZONE_DEVICE mappings * @altmap: pre-allocated/reserved memory for vmemmap allocations * @ref: reference count that pins the devm_memremap_pages() mapping * @done: completion for @ref * @type: memory type: see MEMORY_* above in memremap.h * @flags: PGMAP_* flags to specify defailed behavior * @vmemmap_shift: structural definition of how the vmemmap page metadata * is populated, specifically the metadata page order. * A zero value (default) uses base pages as the vmemmap metadata * representation. A bigger value will set up compound struct pages * of the requested order value. * @ops: method table * @owner: an opaque pointer identifying the entity that manages this * instance. Used by various helpers to make sure that no * foreign ZONE_DEVICE memory is accessed. * @nr_range: number of ranges to be mapped * @range: range to be mapped when nr_range == 1 * @ranges: array of ranges to be mapped when nr_range > 1 */ struct dev_pagemap { struct vmem_altmap altmap; struct percpu_ref ref; struct completion done; enum memory_type type; unsigned int flags; unsigned long vmemmap_shift; const struct dev_pagemap_ops *ops; void *owner; int nr_range; union { struct range range; DECLARE_FLEX_ARRAY(struct range, ranges); }; }; static inline bool pgmap_has_memory_failure(struct dev_pagemap *pgmap) { return pgmap->ops && pgmap->ops->memory_failure; } static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) { if (pgmap->flags & PGMAP_ALTMAP_VALID) return &pgmap->altmap; return NULL; } static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap) { return 1 << pgmap->vmemmap_shift; } static inline bool is_device_private_page(const struct page *page) { return IS_ENABLED(CONFIG_DEVICE_PRIVATE) && is_zone_device_page(page) && page_pgmap(page)->type == MEMORY_DEVICE_PRIVATE; } static inline bool folio_is_device_private(const struct folio *folio) { return is_device_private_page(&folio->page); } static inline bool is_pci_p2pdma_page(const struct page *page) { return IS_ENABLED(CONFIG_PCI_P2PDMA) && is_zone_device_page(page) && page_pgmap(page)->type == MEMORY_DEVICE_PCI_P2PDMA; } static inline bool is_device_coherent_page(const struct page *page) { return is_zone_device_page(page) && page_pgmap(page)->type == MEMORY_DEVICE_COHERENT; } static inline bool folio_is_device_coherent(const struct folio *folio) { return is_device_coherent_page(&folio->page); } static inline bool is_fsdax_page(const struct page *page) { return is_zone_device_page(page) && page_pgmap(page)->type == MEMORY_DEVICE_FS_DAX; } static inline bool folio_is_fsdax(const struct folio *folio) { return is_fsdax_page(&folio->page); } #ifdef CONFIG_ZONE_DEVICE void zone_device_page_init(struct page *page); void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap); struct dev_pagemap *get_dev_pagemap(unsigned long pfn, struct dev_pagemap *pgmap); bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn); unsigned long memremap_compat_align(void); #else static inline void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { /* * Fail attempts to call devm_memremap_pages() without * ZONE_DEVICE support enabled, this requires callers to fall * back to plain devm_memremap() based on config */ WARN_ON_ONCE(1); return ERR_PTR(-ENXIO); } static inline void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap) { } static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn, struct dev_pagemap *pgmap) { return NULL; } static inline bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn) { return false; } /* when memremap_pages() is disabled all archs can remap a single page */ static inline unsigned long memremap_compat_align(void) { return PAGE_SIZE; } #endif /* CONFIG_ZONE_DEVICE */ static inline void put_dev_pagemap(struct dev_pagemap *pgmap) { if (pgmap) percpu_ref_put(&pgmap->ref); } #endif /* _LINUX_MEMREMAP_H_ */
2096 2095 825 2117 86 8 819 819 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 /* SPDX-License-Identifier: GPL-2.0-only */ /* A pointer that can point to either kernel or userspace memory. */ #ifndef _LINUX_BPFPTR_H #define _LINUX_BPFPTR_H #include <linux/mm.h> #include <linux/sockptr.h> typedef sockptr_t bpfptr_t; static inline bool bpfptr_is_kernel(bpfptr_t bpfptr) { return bpfptr.is_kernel; } static inline bpfptr_t KERNEL_BPFPTR(void *p) { return (bpfptr_t) { .kernel = p, .is_kernel = true }; } static inline bpfptr_t USER_BPFPTR(void __user *p) { return (bpfptr_t) { .user = p }; } static inline bpfptr_t make_bpfptr(u64 addr, bool is_kernel) { if (is_kernel) return KERNEL_BPFPTR((void*) (uintptr_t) addr); else return USER_BPFPTR(u64_to_user_ptr(addr)); } static inline bool bpfptr_is_null(bpfptr_t bpfptr) { if (bpfptr_is_kernel(bpfptr)) return !bpfptr.kernel; return !bpfptr.user; } static inline void bpfptr_add(bpfptr_t *bpfptr, size_t val) { if (bpfptr_is_kernel(*bpfptr)) bpfptr->kernel += val; else bpfptr->user += val; } static inline int copy_from_bpfptr_offset(void *dst, bpfptr_t src, size_t offset, size_t size) { if (!bpfptr_is_kernel(src)) return copy_from_user(dst, src.user + offset, size); return copy_from_kernel_nofault(dst, src.kernel + offset, size); } static inline int copy_from_bpfptr(void *dst, bpfptr_t src, size_t size) { return copy_from_bpfptr_offset(dst, src, 0, size); } static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset, const void *src, size_t size) { return copy_to_sockptr_offset((sockptr_t) dst, offset, src, size); } static inline void *kvmemdup_bpfptr_noprof(bpfptr_t src, size_t len) { void *p = kvmalloc_noprof(len, GFP_USER | __GFP_NOWARN); if (!p) return ERR_PTR(-ENOMEM); if (copy_from_bpfptr(p, src, len)) { kvfree(p); return ERR_PTR(-EFAULT); } return p; } #define kvmemdup_bpfptr(...) alloc_hooks(kvmemdup_bpfptr_noprof(__VA_ARGS__)) static inline long strncpy_from_bpfptr(char *dst, bpfptr_t src, size_t count) { if (bpfptr_is_kernel(src)) return strncpy_from_kernel_nofault(dst, src.kernel, count); return strncpy_from_user(dst, src.user, count); } #endif /* _LINUX_BPFPTR_H */
85 213 213 200 18 30 85 38 38 38 38 8 8 33 26 8 26 8 26 8 34 8 25 8 8 215 203 64 5 206 206 85 8 77 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 // SPDX-License-Identifier: GPL-2.0-or-later /* * Digital Audio (PCM) abstract layer * Copyright (c) by Jaroslav Kysela <perex@perex.cz> */ #include <linux/io.h> #include <linux/time.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/moduleparam.h> #include <linux/export.h> #include <sound/core.h> #include <sound/pcm.h> #include <sound/info.h> #include <sound/initval.h> #include "pcm_local.h" static int preallocate_dma = 1; module_param(preallocate_dma, int, 0444); MODULE_PARM_DESC(preallocate_dma, "Preallocate DMA memory when the PCM devices are initialized."); static int maximum_substreams = 4; module_param(maximum_substreams, int, 0444); MODULE_PARM_DESC(maximum_substreams, "Maximum substreams with preallocated DMA memory."); static const size_t snd_minimum_buffer = 16384; static unsigned long max_alloc_per_card = 32UL * 1024UL * 1024UL; module_param(max_alloc_per_card, ulong, 0644); MODULE_PARM_DESC(max_alloc_per_card, "Max total allocation bytes per card."); static void __update_allocated_size(struct snd_card *card, ssize_t bytes) { card->total_pcm_alloc_bytes += bytes; } static void update_allocated_size(struct snd_card *card, ssize_t bytes) { guard(mutex)(&card->memory_mutex); __update_allocated_size(card, bytes); } static void decrease_allocated_size(struct snd_card *card, size_t bytes) { guard(mutex)(&card->memory_mutex); WARN_ON(card->total_pcm_alloc_bytes < bytes); __update_allocated_size(card, -(ssize_t)bytes); } static int do_alloc_pages(struct snd_card *card, int type, struct device *dev, int str, size_t size, struct snd_dma_buffer *dmab) { enum dma_data_direction dir; int err; /* check and reserve the requested size */ scoped_guard(mutex, &card->memory_mutex) { if (max_alloc_per_card && card->total_pcm_alloc_bytes + size > max_alloc_per_card) return -ENOMEM; __update_allocated_size(card, size); } if (str == SNDRV_PCM_STREAM_PLAYBACK) dir = DMA_TO_DEVICE; else dir = DMA_FROM_DEVICE; err = snd_dma_alloc_dir_pages(type, dev, dir, size, dmab); if (!err) { /* the actual allocation size might be bigger than requested, * and we need to correct the account */ if (dmab->bytes != size) update_allocated_size(card, dmab->bytes - size); } else { /* take back on allocation failure */ decrease_allocated_size(card, size); } return err; } static void do_free_pages(struct snd_card *card, struct snd_dma_buffer *dmab) { if (!dmab->area) return; decrease_allocated_size(card, dmab->bytes); snd_dma_free_pages(dmab); dmab->area = NULL; } /* * try to allocate as the large pages as possible. * stores the resultant memory size in *res_size. * * the minimum size is snd_minimum_buffer. it should be power of 2. */ static int preallocate_pcm_pages(struct snd_pcm_substream *substream, size_t size, bool no_fallback) { struct snd_dma_buffer *dmab = &substream->dma_buffer; struct snd_card *card = substream->pcm->card; size_t orig_size = size; int err; do { err = do_alloc_pages(card, dmab->dev.type, dmab->dev.dev, substream->stream, size, dmab); if (err != -ENOMEM) return err; if (no_fallback) break; size >>= 1; } while (size >= snd_minimum_buffer); dmab->bytes = 0; /* tell error */ pr_warn("ALSA pcmC%dD%d%c,%d:%s: cannot preallocate for size %zu\n", substream->pcm->card->number, substream->pcm->device, substream->stream ? 'c' : 'p', substream->number, substream->pcm->name, orig_size); return -ENOMEM; } /** * snd_pcm_lib_preallocate_free - release the preallocated buffer of the specified substream. * @substream: the pcm substream instance * * Releases the pre-allocated buffer of the given substream. */ void snd_pcm_lib_preallocate_free(struct snd_pcm_substream *substream) { do_free_pages(substream->pcm->card, &substream->dma_buffer); } /** * snd_pcm_lib_preallocate_free_for_all - release all pre-allocated buffers on the pcm * @pcm: the pcm instance * * Releases all the pre-allocated buffers on the given pcm. */ void snd_pcm_lib_preallocate_free_for_all(struct snd_pcm *pcm) { struct snd_pcm_substream *substream; int stream; for_each_pcm_substream(pcm, stream, substream) snd_pcm_lib_preallocate_free(substream); } EXPORT_SYMBOL(snd_pcm_lib_preallocate_free_for_all); #ifdef CONFIG_SND_VERBOSE_PROCFS /* * read callback for prealloc proc file * * prints the current allocated size in kB. */ static void snd_pcm_lib_preallocate_proc_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_pcm_substream *substream = entry->private_data; snd_iprintf(buffer, "%lu\n", (unsigned long) substream->dma_buffer.bytes / 1024); } /* * read callback for prealloc_max proc file * * prints the maximum allowed size in kB. */ static void snd_pcm_lib_preallocate_max_proc_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_pcm_substream *substream = entry->private_data; snd_iprintf(buffer, "%lu\n", (unsigned long) substream->dma_max / 1024); } /* * write callback for prealloc proc file * * accepts the preallocation size in kB. */ static void snd_pcm_lib_preallocate_proc_write(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_pcm_substream *substream = entry->private_data; struct snd_card *card = substream->pcm->card; char line[64], str[64]; unsigned long size; struct snd_dma_buffer new_dmab; guard(mutex)(&substream->pcm->open_mutex); if (substream->runtime) { buffer->error = -EBUSY; return; } if (!snd_info_get_line(buffer, line, sizeof(line))) { snd_info_get_str(str, line, sizeof(str)); buffer->error = kstrtoul(str, 10, &size); if (buffer->error != 0) return; size *= 1024; if ((size != 0 && size < 8192) || size > substream->dma_max) { buffer->error = -EINVAL; return; } if (substream->dma_buffer.bytes == size) return; memset(&new_dmab, 0, sizeof(new_dmab)); new_dmab.dev = substream->dma_buffer.dev; if (size > 0) { if (do_alloc_pages(card, substream->dma_buffer.dev.type, substream->dma_buffer.dev.dev, substream->stream, size, &new_dmab) < 0) { buffer->error = -ENOMEM; pr_debug("ALSA pcmC%dD%d%c,%d:%s: cannot preallocate for size %lu\n", substream->pcm->card->number, substream->pcm->device, substream->stream ? 'c' : 'p', substream->number, substream->pcm->name, size); return; } substream->buffer_bytes_max = size; } else { substream->buffer_bytes_max = UINT_MAX; } if (substream->dma_buffer.area) do_free_pages(card, &substream->dma_buffer); substream->dma_buffer = new_dmab; } else { buffer->error = -EINVAL; } } static inline void preallocate_info_init(struct snd_pcm_substream *substream) { struct snd_info_entry *entry; entry = snd_info_create_card_entry(substream->pcm->card, "prealloc", substream->proc_root); if (entry) { snd_info_set_text_ops(entry, substream, snd_pcm_lib_preallocate_proc_read); entry->c.text.write = snd_pcm_lib_preallocate_proc_write; entry->mode |= 0200; } entry = snd_info_create_card_entry(substream->pcm->card, "prealloc_max", substream->proc_root); if (entry) snd_info_set_text_ops(entry, substream, snd_pcm_lib_preallocate_max_proc_read); } #else /* !CONFIG_SND_VERBOSE_PROCFS */ static inline void preallocate_info_init(struct snd_pcm_substream *substream) { } #endif /* CONFIG_SND_VERBOSE_PROCFS */ /* * pre-allocate the buffer and create a proc file for the substream */ static int preallocate_pages(struct snd_pcm_substream *substream, int type, struct device *data, size_t size, size_t max, bool managed) { int err; if (snd_BUG_ON(substream->dma_buffer.dev.type)) return -EINVAL; substream->dma_buffer.dev.type = type; substream->dma_buffer.dev.dev = data; if (size > 0) { if (!max) { /* no fallback, only also inform -ENOMEM */ err = preallocate_pcm_pages(substream, size, true); if (err < 0) return err; } else if (preallocate_dma && substream->number < maximum_substreams) { err = preallocate_pcm_pages(substream, size, false); if (err < 0 && err != -ENOMEM) return err; } } if (substream->dma_buffer.bytes > 0) substream->buffer_bytes_max = substream->dma_buffer.bytes; substream->dma_max = max; if (max > 0) preallocate_info_init(substream); if (managed) substream->managed_buffer_alloc = 1; return 0; } static int preallocate_pages_for_all(struct snd_pcm *pcm, int type, void *data, size_t size, size_t max, bool managed) { struct snd_pcm_substream *substream; int stream, err; for_each_pcm_substream(pcm, stream, substream) { err = preallocate_pages(substream, type, data, size, max, managed); if (err < 0) return err; } return 0; } /** * snd_pcm_lib_preallocate_pages - pre-allocation for the given DMA type * @substream: the pcm substream instance * @type: DMA type (SNDRV_DMA_TYPE_*) * @data: DMA type dependent data * @size: the requested pre-allocation size in bytes * @max: the max. allowed pre-allocation size * * Do pre-allocation for the given DMA buffer type. */ void snd_pcm_lib_preallocate_pages(struct snd_pcm_substream *substream, int type, struct device *data, size_t size, size_t max) { preallocate_pages(substream, type, data, size, max, false); } EXPORT_SYMBOL(snd_pcm_lib_preallocate_pages); /** * snd_pcm_lib_preallocate_pages_for_all - pre-allocation for continuous memory type (all substreams) * @pcm: the pcm instance * @type: DMA type (SNDRV_DMA_TYPE_*) * @data: DMA type dependent data * @size: the requested pre-allocation size in bytes * @max: the max. allowed pre-allocation size * * Do pre-allocation to all substreams of the given pcm for the * specified DMA type. */ void snd_pcm_lib_preallocate_pages_for_all(struct snd_pcm *pcm, int type, void *data, size_t size, size_t max) { preallocate_pages_for_all(pcm, type, data, size, max, false); } EXPORT_SYMBOL(snd_pcm_lib_preallocate_pages_for_all); /** * snd_pcm_set_managed_buffer - set up buffer management for a substream * @substream: the pcm substream instance * @type: DMA type (SNDRV_DMA_TYPE_*) * @data: DMA type dependent data * @size: the requested pre-allocation size in bytes * @max: the max. allowed pre-allocation size * * Do pre-allocation for the given DMA buffer type, and set the managed * buffer allocation mode to the given substream. * In this mode, PCM core will allocate a buffer automatically before PCM * hw_params ops call, and release the buffer after PCM hw_free ops call * as well, so that the driver doesn't need to invoke the allocation and * the release explicitly in its callback. * When a buffer is actually allocated before the PCM hw_params call, it * turns on the runtime buffer_changed flag for drivers changing their h/w * parameters accordingly. * * When @size is non-zero and @max is zero, this tries to allocate for only * the exact buffer size without fallback, and may return -ENOMEM. * Otherwise, the function tries to allocate smaller chunks if the allocation * fails. This is the behavior of snd_pcm_set_fixed_buffer(). * * When both @size and @max are zero, the function only sets up the buffer * for later dynamic allocations. It's used typically for buffers with * SNDRV_DMA_TYPE_VMALLOC type. * * Upon successful buffer allocation and setup, the function returns 0. * * Return: zero if successful, or a negative error code */ int snd_pcm_set_managed_buffer(struct snd_pcm_substream *substream, int type, struct device *data, size_t size, size_t max) { return preallocate_pages(substream, type, data, size, max, true); } EXPORT_SYMBOL(snd_pcm_set_managed_buffer); /** * snd_pcm_set_managed_buffer_all - set up buffer management for all substreams * for all substreams * @pcm: the pcm instance * @type: DMA type (SNDRV_DMA_TYPE_*) * @data: DMA type dependent data * @size: the requested pre-allocation size in bytes * @max: the max. allowed pre-allocation size * * Do pre-allocation to all substreams of the given pcm for the specified DMA * type and size, and set the managed_buffer_alloc flag to each substream. * * Return: zero if successful, or a negative error code */ int snd_pcm_set_managed_buffer_all(struct snd_pcm *pcm, int type, struct device *data, size_t size, size_t max) { return preallocate_pages_for_all(pcm, type, data, size, max, true); } EXPORT_SYMBOL(snd_pcm_set_managed_buffer_all); /** * snd_pcm_lib_malloc_pages - allocate the DMA buffer * @substream: the substream to allocate the DMA buffer to * @size: the requested buffer size in bytes * * Allocates the DMA buffer on the BUS type given earlier to * snd_pcm_lib_preallocate_xxx_pages(). * * Return: 1 if the buffer is changed, 0 if not changed, or a negative * code on failure. */ int snd_pcm_lib_malloc_pages(struct snd_pcm_substream *substream, size_t size) { struct snd_card *card; struct snd_pcm_runtime *runtime; struct snd_dma_buffer *dmab = NULL; if (PCM_RUNTIME_CHECK(substream)) return -EINVAL; if (snd_BUG_ON(substream->dma_buffer.dev.type == SNDRV_DMA_TYPE_UNKNOWN)) return -EINVAL; runtime = substream->runtime; card = substream->pcm->card; if (runtime->dma_buffer_p) { /* perphaps, we might free the large DMA memory region to save some space here, but the actual solution costs us less time */ if (runtime->dma_buffer_p->bytes >= size) { runtime->dma_bytes = size; return 0; /* ok, do not change */ } snd_pcm_lib_free_pages(substream); } if (substream->dma_buffer.area != NULL && substream->dma_buffer.bytes >= size) { dmab = &substream->dma_buffer; /* use the pre-allocated buffer */ } else { /* dma_max=0 means the fixed size preallocation */ if (substream->dma_buffer.area && !substream->dma_max) return -ENOMEM; dmab = kzalloc(sizeof(*dmab), GFP_KERNEL); if (! dmab) return -ENOMEM; dmab->dev = substream->dma_buffer.dev; if (do_alloc_pages(card, substream->dma_buffer.dev.type, substream->dma_buffer.dev.dev, substream->stream, size, dmab) < 0) { kfree(dmab); pr_debug("ALSA pcmC%dD%d%c,%d:%s: cannot allocate for size %zu\n", substream->pcm->card->number, substream->pcm->device, substream->stream ? 'c' : 'p', substream->number, substream->pcm->name, size); return -ENOMEM; } } snd_pcm_set_runtime_buffer(substream, dmab); runtime->dma_bytes = size; return 1; /* area was changed */ } EXPORT_SYMBOL(snd_pcm_lib_malloc_pages); /** * snd_pcm_lib_free_pages - release the allocated DMA buffer. * @substream: the substream to release the DMA buffer * * Releases the DMA buffer allocated via snd_pcm_lib_malloc_pages(). * * Return: Zero if successful, or a negative error code on failure. */ int snd_pcm_lib_free_pages(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime; if (PCM_RUNTIME_CHECK(substream)) return -EINVAL; runtime = substream->runtime; if (runtime->dma_area == NULL) return 0; if (runtime->dma_buffer_p != &substream->dma_buffer) { struct snd_card *card = substream->pcm->card; /* it's a newly allocated buffer. release it now. */ do_free_pages(card, runtime->dma_buffer_p); kfree(runtime->dma_buffer_p); } snd_pcm_set_runtime_buffer(substream, NULL); return 0; } EXPORT_SYMBOL(snd_pcm_lib_free_pages);
1 1 2 2 2 2 2 1 1 3 3 3 3 3 1 1 1 1 200 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 // SPDX-License-Identifier: GPL-2.0-only /* * Syscall interface to knfsd. * * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ #include <linux/slab.h> #include <linux/namei.h> #include <linux/ctype.h> #include <linux/fs_context.h> #include <linux/sunrpc/svcsock.h> #include <linux/lockd/lockd.h> #include <linux/sunrpc/addr.h> #include <linux/sunrpc/gss_api.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/svc.h> #include <linux/module.h> #include <linux/fsnotify.h> #include <linux/nfslocalio.h> #include "idmap.h" #include "nfsd.h" #include "cache.h" #include "state.h" #include "netns.h" #include "pnfs.h" #include "filecache.h" #include "trace.h" #include "netlink.h" /* * We have a single directory with several nodes in it. */ enum { NFSD_Root = 1, NFSD_List, NFSD_Export_Stats, NFSD_Export_features, NFSD_Fh, NFSD_FO_UnlockIP, NFSD_FO_UnlockFS, NFSD_Threads, NFSD_Pool_Threads, NFSD_Pool_Stats, NFSD_Reply_Cache_Stats, NFSD_Versions, NFSD_Ports, NFSD_MaxBlkSize, NFSD_Filecache, NFSD_Leasetime, NFSD_Gracetime, NFSD_RecoveryDir, NFSD_V4EndGrace, NFSD_MaxReserved }; /* * write() for these nodes. */ static ssize_t write_filehandle(struct file *file, char *buf, size_t size); static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size); static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size); static ssize_t write_threads(struct file *file, char *buf, size_t size); static ssize_t write_pool_threads(struct file *file, char *buf, size_t size); static ssize_t write_versions(struct file *file, char *buf, size_t size); static ssize_t write_ports(struct file *file, char *buf, size_t size); static ssize_t write_maxblksize(struct file *file, char *buf, size_t size); #ifdef CONFIG_NFSD_V4 static ssize_t write_leasetime(struct file *file, char *buf, size_t size); static ssize_t write_gracetime(struct file *file, char *buf, size_t size); #ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); #endif static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size); #endif static ssize_t (*const write_op[])(struct file *, char *, size_t) = { [NFSD_Fh] = write_filehandle, [NFSD_FO_UnlockIP] = write_unlock_ip, [NFSD_FO_UnlockFS] = write_unlock_fs, [NFSD_Threads] = write_threads, [NFSD_Pool_Threads] = write_pool_threads, [NFSD_Versions] = write_versions, [NFSD_Ports] = write_ports, [NFSD_MaxBlkSize] = write_maxblksize, #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = write_leasetime, [NFSD_Gracetime] = write_gracetime, #ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING [NFSD_RecoveryDir] = write_recoverydir, #endif [NFSD_V4EndGrace] = write_v4_end_grace, #endif }; static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos) { ino_t ino = file_inode(file)->i_ino; char *data; ssize_t rv; if (ino >= ARRAY_SIZE(write_op) || !write_op[ino]) return -EINVAL; data = simple_transaction_get(file, buf, size); if (IS_ERR(data)) return PTR_ERR(data); rv = write_op[ino](file, data, size); if (rv < 0) return rv; simple_transaction_set(file, rv); return size; } static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) { if (! file->private_data) { /* An attempt to read a transaction file without writing * causes a 0-byte write so that the file can return * state information */ ssize_t rv = nfsctl_transaction_write(file, buf, 0, pos); if (rv < 0) return rv; } return simple_transaction_read(file, buf, size, pos); } static const struct file_operations transaction_ops = { .write = nfsctl_transaction_write, .read = nfsctl_transaction_read, .release = simple_transaction_release, .llseek = default_llseek, }; static int exports_net_open(struct net *net, struct file *file) { int err; struct seq_file *seq; struct nfsd_net *nn = net_generic(net, nfsd_net_id); err = seq_open(file, &nfs_exports_op); if (err) return err; seq = file->private_data; seq->private = nn->svc_export_cache; return 0; } static int exports_nfsd_open(struct inode *inode, struct file *file) { return exports_net_open(inode->i_sb->s_fs_info, file); } static const struct file_operations exports_nfsd_operations = { .open = exports_nfsd_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; static int export_features_show(struct seq_file *m, void *v) { seq_printf(m, "0x%x 0x%x\n", NFSEXP_ALLFLAGS, NFSEXP_SECINFO_FLAGS); return 0; } DEFINE_SHOW_ATTRIBUTE(export_features); static int nfsd_pool_stats_open(struct inode *inode, struct file *file) { struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id); return svc_pool_stats_open(&nn->nfsd_info, file); } static const struct file_operations pool_stats_operations = { .open = nfsd_pool_stats_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; DEFINE_SHOW_ATTRIBUTE(nfsd_reply_cache_stats); DEFINE_SHOW_ATTRIBUTE(nfsd_file_cache_stats); /*----------------------------------------------------------------------------*/ /* * payload - write methods */ static inline struct net *netns(struct file *file) { return file_inode(file)->i_sb->s_fs_info; } /* * write_unlock_ip - Release all locks used by a client * * Experimental. * * Input: * buf: '\n'-terminated C string containing a * presentation format IP address * size: length of C string in @buf * Output: * On success: returns zero if all specified locks were released; * returns one if one or more locks were not released * On error: return code is negative errno value */ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) { struct sockaddr_storage address; struct sockaddr *sap = (struct sockaddr *)&address; size_t salen = sizeof(address); char *fo_path; struct net *net = netns(file); /* sanity check */ if (size == 0) return -EINVAL; if (buf[size-1] != '\n') return -EINVAL; fo_path = buf; if (qword_get(&buf, fo_path, size) < 0) return -EINVAL; if (rpc_pton(net, fo_path, size, sap, salen) == 0) return -EINVAL; trace_nfsd_ctl_unlock_ip(net, buf); return nlmsvc_unlock_all_by_ip(sap); } /* * write_unlock_fs - Release all locks on a local file system * * Experimental. * * Input: * buf: '\n'-terminated C string containing the * absolute pathname of a local file system * size: length of C string in @buf * Output: * On success: returns zero if all specified locks were released; * returns one if one or more locks were not released * On error: return code is negative errno value */ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size) { struct path path; char *fo_path; int error; /* sanity check */ if (size == 0) return -EINVAL; if (buf[size-1] != '\n') return -EINVAL; fo_path = buf; if (qword_get(&buf, fo_path, size) < 0) return -EINVAL; trace_nfsd_ctl_unlock_fs(netns(file), fo_path); error = kern_path(fo_path, 0, &path); if (error) return error; /* * XXX: Needs better sanity checking. Otherwise we could end up * releasing locks on the wrong file system. * * For example: * 1. Does the path refer to a directory? * 2. Is that directory a mount point, or * 3. Is that directory the root of an exported file system? */ error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb); nfsd4_revoke_states(netns(file), path.dentry->d_sb); path_put(&path); return error; } /* * write_filehandle - Get a variable-length NFS file handle by path * * On input, the buffer contains a '\n'-terminated C string comprised of * three alphanumeric words separated by whitespace. The string may * contain escape sequences. * * Input: * buf: * domain: client domain name * path: export pathname * maxsize: numeric maximum size of * @buf * size: length of C string in @buf * Output: * On success: passed-in buffer filled with '\n'-terminated C * string containing a ASCII hex text version * of the NFS file handle; * return code is the size in bytes of the string * On error: return code is negative errno value */ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) { char *dname, *path; int maxsize; char *mesg = buf; int len; struct auth_domain *dom; struct knfsd_fh fh; if (size == 0) return -EINVAL; if (buf[size-1] != '\n') return -EINVAL; buf[size-1] = 0; dname = mesg; len = qword_get(&mesg, dname, size); if (len <= 0) return -EINVAL; path = dname+len+1; len = qword_get(&mesg, path, size); if (len <= 0) return -EINVAL; len = get_int(&mesg, &maxsize); if (len) return len; if (maxsize < NFS_FHSIZE) return -EINVAL; maxsize = min(maxsize, NFS3_FHSIZE); if (qword_get(&mesg, mesg, size) > 0) return -EINVAL; trace_nfsd_ctl_filehandle(netns(file), dname, path, maxsize); /* we have all the words, they are in buf.. */ dom = unix_domain_find(dname); if (!dom) return -ENOMEM; len = exp_rootfh(netns(file), dom, path, &fh, maxsize); auth_domain_put(dom); if (len) return len; mesg = buf; len = SIMPLE_TRANSACTION_LIMIT; qword_addhex(&mesg, &len, fh.fh_raw, fh.fh_size); mesg[-1] = '\n'; return mesg - buf; } /* * write_threads - Start NFSD, or report the current number of running threads * * Input: * buf: ignored * size: zero * Output: * On success: passed-in buffer filled with '\n'-terminated C * string numeric value representing the number of * running NFSD threads; * return code is the size in bytes of the string * On error: return code is zero * * OR * * Input: * buf: C string containing an unsigned * integer value representing the * number of NFSD threads to start * size: non-zero length of C string in @buf * Output: * On success: NFS service is started; * passed-in buffer filled with '\n'-terminated C * string numeric value representing the number of * running NFSD threads; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value */ static ssize_t write_threads(struct file *file, char *buf, size_t size) { char *mesg = buf; int rv; struct net *net = netns(file); if (size > 0) { int newthreads; rv = get_int(&mesg, &newthreads); if (rv) return rv; if (newthreads < 0) return -EINVAL; trace_nfsd_ctl_threads(net, newthreads); mutex_lock(&nfsd_mutex); rv = nfsd_svc(1, &newthreads, net, file->f_cred, NULL); mutex_unlock(&nfsd_mutex); if (rv < 0) return rv; } else rv = nfsd_nrthreads(net); return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", rv); } /* * write_pool_threads - Set or report the current number of threads per pool * * Input: * buf: ignored * size: zero * * OR * * Input: * buf: C string containing whitespace- * separated unsigned integer values * representing the number of NFSD * threads to start in each pool * size: non-zero length of C string in @buf * Output: * On success: passed-in buffer filled with '\n'-terminated C * string containing integer values representing the * number of NFSD threads in each pool; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value */ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) { /* if size > 0, look for an array of number of threads per node * and apply them then write out number of threads per node as reply */ char *mesg = buf; int i; int rv; int len; int npools; int *nthreads; struct net *net = netns(file); mutex_lock(&nfsd_mutex); npools = nfsd_nrpools(net); if (npools == 0) { /* * NFS is shut down. The admin can start it by * writing to the threads file but NOT the pool_threads * file, sorry. Report zero threads. */ mutex_unlock(&nfsd_mutex); strcpy(buf, "0\n"); return strlen(buf); } nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL); rv = -ENOMEM; if (nthreads == NULL) goto out_free; if (size > 0) { for (i = 0; i < npools; i++) { rv = get_int(&mesg, &nthreads[i]); if (rv == -ENOENT) break; /* fewer numbers than pools */ if (rv) goto out_free; /* syntax error */ rv = -EINVAL; if (nthreads[i] < 0) goto out_free; trace_nfsd_ctl_pool_threads(net, i, nthreads[i]); } /* * There must always be a thread in pool 0; the admin * can't shut down NFS completely using pool_threads. */ if (nthreads[0] == 0) nthreads[0] = 1; rv = nfsd_set_nrthreads(i, nthreads, net); if (rv) goto out_free; } rv = nfsd_get_nrthreads(npools, nthreads, net); if (rv) goto out_free; mesg = buf; size = SIMPLE_TRANSACTION_LIMIT; for (i = 0; i < npools && size > 0; i++) { snprintf(mesg, size, "%d%c", nthreads[i], (i == npools-1 ? '\n' : ' ')); len = strlen(mesg); size -= len; mesg += len; } rv = mesg - buf; out_free: kfree(nthreads); mutex_unlock(&nfsd_mutex); return rv; } static ssize_t nfsd_print_version_support(struct nfsd_net *nn, char *buf, int remaining, const char *sep, unsigned vers, int minor) { const char *format = minor < 0 ? "%s%c%u" : "%s%c%u.%u"; bool supported = !!nfsd_vers(nn, vers, NFSD_TEST); if (vers == 4 && minor >= 0 && !nfsd_minorversion(nn, minor, NFSD_TEST)) supported = false; if (minor == 0 && supported) /* * special case for backward compatability. * +4.0 is never reported, it is implied by * +4, unless -4.0 is present. */ return 0; return snprintf(buf, remaining, format, sep, supported ? '+' : '-', vers, minor); } static ssize_t __write_versions(struct file *file, char *buf, size_t size) { char *mesg = buf; char *vers, *minorp, sign; int len, num, remaining; ssize_t tlen = 0; char *sep; struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); if (size > 0) { if (nn->nfsd_serv) /* Cannot change versions without updating * nn->nfsd_serv->sv_xdrsize, and reallocing * rq_argp and rq_resp */ return -EBUSY; if (buf[size-1] != '\n') return -EINVAL; buf[size-1] = 0; trace_nfsd_ctl_version(netns(file), buf); vers = mesg; len = qword_get(&mesg, vers, size); if (len <= 0) return -EINVAL; do { enum vers_op cmd; unsigned minor; sign = *vers; if (sign == '+' || sign == '-') num = simple_strtol((vers+1), &minorp, 0); else num = simple_strtol(vers, &minorp, 0); if (*minorp == '.') { if (num != 4) return -EINVAL; if (kstrtouint(minorp+1, 0, &minor) < 0) return -EINVAL; } cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET; switch(num) { #ifdef CONFIG_NFSD_V2 case 2: #endif case 3: nfsd_vers(nn, num, cmd); break; case 4: if (*minorp == '.') { if (nfsd_minorversion(nn, minor, cmd) < 0) return -EINVAL; } else if ((cmd == NFSD_SET) != nfsd_vers(nn, num, NFSD_TEST)) { /* * Either we have +4 and no minors are enabled, * or we have -4 and at least one minor is enabled. * In either case, propagate 'cmd' to all minors. */ minor = 0; while (nfsd_minorversion(nn, minor, cmd) >= 0) minor++; } break; default: /* Ignore requests to disable non-existent versions */ if (cmd == NFSD_SET) return -EINVAL; } vers += len + 1; } while ((len = qword_get(&mesg, vers, size)) > 0); /* If all get turned off, turn them back on, as * having no versions is BAD */ nfsd_reset_versions(nn); } /* Now write current state into reply buffer */ sep = ""; remaining = SIMPLE_TRANSACTION_LIMIT; for (num=2 ; num <= 4 ; num++) { int minor; if (!nfsd_vers(nn, num, NFSD_AVAIL)) continue; minor = -1; do { len = nfsd_print_version_support(nn, buf, remaining, sep, num, minor); if (len >= remaining) goto out; remaining -= len; buf += len; tlen += len; minor++; if (len) sep = " "; } while (num == 4 && minor <= NFSD_SUPPORTED_MINOR_VERSION); } out: len = snprintf(buf, remaining, "\n"); if (len >= remaining) return -EINVAL; return tlen + len; } /* * write_versions - Set or report the available NFS protocol versions * * Input: * buf: ignored * size: zero * Output: * On success: passed-in buffer filled with '\n'-terminated C * string containing positive or negative integer * values representing the current status of each * protocol version; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value * * OR * * Input: * buf: C string containing whitespace- * separated positive or negative * integer values representing NFS * protocol versions to enable ("+n") * or disable ("-n") * size: non-zero length of C string in @buf * Output: * On success: status of zero or more protocol versions has * been updated; passed-in buffer filled with * '\n'-terminated C string containing positive * or negative integer values representing the * current status of each protocol version; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value */ static ssize_t write_versions(struct file *file, char *buf, size_t size) { ssize_t rv; mutex_lock(&nfsd_mutex); rv = __write_versions(file, buf, size); mutex_unlock(&nfsd_mutex); return rv; } /* * Zero-length write. Return a list of NFSD's current listener * transports. */ static ssize_t __write_ports_names(char *buf, struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (nn->nfsd_serv == NULL) return 0; return svc_xprt_names(nn->nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT); } /* * A single 'fd' number was written, in which case it must be for * a socket of a supported family/protocol, and we use it as an * nfsd listener. */ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred *cred) { char *mesg = buf; int fd, err; struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_serv *serv; err = get_int(&mesg, &fd); if (err != 0 || fd < 0) return -EINVAL; trace_nfsd_ctl_ports_addfd(net, fd); err = nfsd_create_serv(net); if (err != 0) return err; serv = nn->nfsd_serv; err = svc_addsock(serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred); if (!serv->sv_nrthreads && list_empty(&nn->nfsd_serv->sv_permsocks)) nfsd_destroy_serv(net); return err; } /* * A transport listener is added by writing its transport name and * a port number. */ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cred *cred) { char transport[16]; struct svc_xprt *xprt; int port, err; struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_serv *serv; if (sscanf(buf, "%15s %5u", transport, &port) != 2) return -EINVAL; if (port < 1 || port > USHRT_MAX) return -EINVAL; trace_nfsd_ctl_ports_addxprt(net, transport, port); err = nfsd_create_serv(net); if (err != 0) return err; serv = nn->nfsd_serv; err = svc_xprt_create(serv, transport, net, PF_INET, port, SVC_SOCK_ANONYMOUS, cred); if (err < 0) goto out_err; err = svc_xprt_create(serv, transport, net, PF_INET6, port, SVC_SOCK_ANONYMOUS, cred); if (err < 0 && err != -EAFNOSUPPORT) goto out_close; return 0; out_close: xprt = svc_find_xprt(serv, transport, net, PF_INET, port); if (xprt != NULL) { svc_xprt_close(xprt); svc_xprt_put(xprt); } out_err: if (!serv->sv_nrthreads && list_empty(&nn->nfsd_serv->sv_permsocks)) nfsd_destroy_serv(net); return err; } static ssize_t __write_ports(struct file *file, char *buf, size_t size, struct net *net) { if (size == 0) return __write_ports_names(buf, net); if (isdigit(buf[0])) return __write_ports_addfd(buf, net, file->f_cred); if (isalpha(buf[0])) return __write_ports_addxprt(buf, net, file->f_cred); return -EINVAL; } /* * write_ports - Pass a socket file descriptor or transport name to listen on * * Input: * buf: ignored * size: zero * Output: * On success: passed-in buffer filled with a '\n'-terminated C * string containing a whitespace-separated list of * named NFSD listeners; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value * * OR * * Input: * buf: C string containing an unsigned * integer value representing a bound * but unconnected socket that is to be * used as an NFSD listener; listen(3) * must be called for a SOCK_STREAM * socket, otherwise it is ignored * size: non-zero length of C string in @buf * Output: * On success: NFS service is started; * passed-in buffer filled with a '\n'-terminated C * string containing a unique alphanumeric name of * the listener; * return code is the size in bytes of the string * On error: return code is a negative errno value * * OR * * Input: * buf: C string containing a transport * name and an unsigned integer value * representing the port to listen on, * separated by whitespace * size: non-zero length of C string in @buf * Output: * On success: returns zero; NFS service is started * On error: return code is a negative errno value */ static ssize_t write_ports(struct file *file, char *buf, size_t size) { ssize_t rv; mutex_lock(&nfsd_mutex); rv = __write_ports(file, buf, size, netns(file)); mutex_unlock(&nfsd_mutex); return rv; } int nfsd_max_blksize; /* * write_maxblksize - Set or report the current NFS blksize * * Input: * buf: ignored * size: zero * * OR * * Input: * buf: C string containing an unsigned * integer value representing the new * NFS blksize * size: non-zero length of C string in @buf * Output: * On success: passed-in buffer filled with '\n'-terminated C string * containing numeric value of the current NFS blksize * setting; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value */ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) { char *mesg = buf; struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); if (size > 0) { int bsize; int rv = get_int(&mesg, &bsize); if (rv) return rv; trace_nfsd_ctl_maxblksize(netns(file), bsize); /* force bsize into allowed range and * required alignment. */ bsize = max_t(int, bsize, 1024); bsize = min_t(int, bsize, NFSSVC_MAXBLKSIZE); bsize &= ~(1024-1); mutex_lock(&nfsd_mutex); if (nn->nfsd_serv) { mutex_unlock(&nfsd_mutex); return -EBUSY; } nfsd_max_blksize = bsize; mutex_unlock(&nfsd_mutex); } return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", nfsd_max_blksize); } #ifdef CONFIG_NFSD_V4 static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time64_t *time, struct nfsd_net *nn) { struct dentry *dentry = file_dentry(file); char *mesg = buf; int rv, i; if (size > 0) { if (nn->nfsd_serv) return -EBUSY; rv = get_int(&mesg, &i); if (rv) return rv; trace_nfsd_ctl_time(netns(file), dentry->d_name.name, dentry->d_name.len, i); /* * Some sanity checking. We don't have a reason for * these particular numbers, but problems with the * extremes are: * - Too short: the briefest network outage may * cause clients to lose all their locks. Also, * the frequent polling may be wasteful. * - Too long: do you really want reboot recovery * to take more than an hour? Or to make other * clients wait an hour before being able to * revoke a dead client's locks? */ if (i < 10 || i > 3600) return -EINVAL; *time = i; } return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%lld\n", *time); } static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time64_t *time, struct nfsd_net *nn) { ssize_t rv; mutex_lock(&nfsd_mutex); rv = __nfsd4_write_time(file, buf, size, time, nn); mutex_unlock(&nfsd_mutex); return rv; } /* * write_leasetime - Set or report the current NFSv4 lease time * * Input: * buf: ignored * size: zero * * OR * * Input: * buf: C string containing an unsigned * integer value representing the new * NFSv4 lease expiry time * size: non-zero length of C string in @buf * Output: * On success: passed-in buffer filled with '\n'-terminated C * string containing unsigned integer value of the * current lease expiry time; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value */ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) { struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn); } /* * write_gracetime - Set or report current NFSv4 grace period time * * As above, but sets the time of the NFSv4 grace period. * * Note this should never be set to less than the *previous* * lease-period time, but we don't try to enforce this. (In the common * case (a new boot), we don't know what the previous lease time was * anyway.) */ static ssize_t write_gracetime(struct file *file, char *buf, size_t size) { struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace, nn); } #ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size, struct nfsd_net *nn) { char *mesg = buf; char *recdir; int len, status; if (size > 0) { if (nn->nfsd_serv) return -EBUSY; if (size > PATH_MAX || buf[size-1] != '\n') return -EINVAL; buf[size-1] = 0; recdir = mesg; len = qword_get(&mesg, recdir, size); if (len <= 0) return -EINVAL; trace_nfsd_ctl_recoverydir(netns(file), recdir); status = nfs4_reset_recoverydir(recdir); if (status) return status; } return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%s\n", nfs4_recoverydir()); } /* * write_recoverydir - Set or report the pathname of the recovery directory * * Input: * buf: ignored * size: zero * * OR * * Input: * buf: C string containing the pathname * of the directory on a local file * system containing permanent NFSv4 * recovery data * size: non-zero length of C string in @buf * Output: * On success: passed-in buffer filled with '\n'-terminated C string * containing the current recovery pathname setting; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value */ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) { ssize_t rv; struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); mutex_lock(&nfsd_mutex); rv = __write_recoverydir(file, buf, size, nn); mutex_unlock(&nfsd_mutex); return rv; } #endif /* * write_v4_end_grace - release grace period for nfsd's v4.x lock manager * * Input: * buf: ignored * size: zero * OR * * Input: * buf: any value * size: non-zero length of C string in @buf * Output: * passed-in buffer filled with "Y" or "N" with a newline * and NULL-terminated C string. This indicates whether * the grace period has ended in the current net * namespace. Return code is the size in bytes of the * string. Writing a string that starts with 'Y', 'y', or * '1' to the file will end the grace period for nfsd's v4 * lock manager. */ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) { struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); if (size > 0) { switch(buf[0]) { case 'Y': case 'y': case '1': if (!nn->nfsd_serv) return -EBUSY; trace_nfsd_end_grace(netns(file)); nfsd4_end_grace(nn); break; default: return -EINVAL; } } return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%c\n", nn->grace_ended ? 'Y' : 'N'); } #endif /*----------------------------------------------------------------------------*/ /* * populating the filesystem. */ /* Basically copying rpc_get_inode. */ static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode) { struct inode *inode = new_inode(sb); if (!inode) return NULL; /* Following advice from simple_fill_super documentation: */ inode->i_ino = iunique(sb, NFSD_MaxReserved); inode->i_mode = mode; simple_inode_init_ts(inode); switch (mode & S_IFMT) { case S_IFDIR: inode->i_fop = &simple_dir_operations; inode->i_op = &simple_dir_inode_operations; inc_nlink(inode); break; case S_IFLNK: inode->i_op = &simple_symlink_inode_operations; break; default: break; } return inode; } static int __nfsd_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode, struct nfsdfs_client *ncl) { struct inode *inode; inode = nfsd_get_inode(dir->i_sb, mode); if (!inode) return -ENOMEM; if (ncl) { inode->i_private = ncl; kref_get(&ncl->cl_ref); } d_add(dentry, inode); inc_nlink(dir); fsnotify_mkdir(dir, dentry); return 0; } static struct dentry *nfsd_mkdir(struct dentry *parent, struct nfsdfs_client *ncl, char *name) { struct inode *dir = parent->d_inode; struct dentry *dentry; int ret = -ENOMEM; inode_lock(dir); dentry = d_alloc_name(parent, name); if (!dentry) goto out_err; ret = __nfsd_mkdir(d_inode(parent), dentry, S_IFDIR | 0600, ncl); if (ret) goto out_err; out: inode_unlock(dir); return dentry; out_err: dput(dentry); dentry = ERR_PTR(ret); goto out; } #if IS_ENABLED(CONFIG_SUNRPC_GSS) static int __nfsd_symlink(struct inode *dir, struct dentry *dentry, umode_t mode, const char *content) { struct inode *inode; inode = nfsd_get_inode(dir->i_sb, mode); if (!inode) return -ENOMEM; inode->i_link = (char *)content; inode->i_size = strlen(content); d_add(dentry, inode); inc_nlink(dir); fsnotify_create(dir, dentry); return 0; } /* * @content is assumed to be a NUL-terminated string that lives * longer than the symlink itself. */ static void _nfsd_symlink(struct dentry *parent, const char *name, const char *content) { struct inode *dir = parent->d_inode; struct dentry *dentry; int ret; inode_lock(dir); dentry = d_alloc_name(parent, name); if (!dentry) goto out; ret = __nfsd_symlink(d_inode(parent), dentry, S_IFLNK | 0777, content); if (ret) dput(dentry); out: inode_unlock(dir); } #else static inline void _nfsd_symlink(struct dentry *parent, const char *name, const char *content) { } #endif static void clear_ncl(struct dentry *dentry) { struct inode *inode = d_inode(dentry); struct nfsdfs_client *ncl = inode->i_private; spin_lock(&inode->i_lock); inode->i_private = NULL; spin_unlock(&inode->i_lock); kref_put(&ncl->cl_ref, ncl->cl_release); } struct nfsdfs_client *get_nfsdfs_client(struct inode *inode) { struct nfsdfs_client *nc; spin_lock(&inode->i_lock); nc = inode->i_private; if (nc) kref_get(&nc->cl_ref); spin_unlock(&inode->i_lock); return nc; } /* XXX: cut'n'paste from simple_fill_super; figure out if we could share * code instead. */ static int nfsdfs_create_files(struct dentry *root, const struct tree_descr *files, struct nfsdfs_client *ncl, struct dentry **fdentries) { struct inode *dir = d_inode(root); struct inode *inode; struct dentry *dentry; int i; inode_lock(dir); for (i = 0; files->name && files->name[0]; i++, files++) { dentry = d_alloc_name(root, files->name); if (!dentry) goto out; inode = nfsd_get_inode(d_inode(root)->i_sb, S_IFREG | files->mode); if (!inode) { dput(dentry); goto out; } kref_get(&ncl->cl_ref); inode->i_fop = files->ops; inode->i_private = ncl; d_add(dentry, inode); fsnotify_create(dir, dentry); if (fdentries) fdentries[i] = dentry; } inode_unlock(dir); return 0; out: inode_unlock(dir); return -ENOMEM; } /* on success, returns positive number unique to that client. */ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, struct nfsdfs_client *ncl, u32 id, const struct tree_descr *files, struct dentry **fdentries) { struct dentry *dentry; char name[11]; int ret; sprintf(name, "%u", id); dentry = nfsd_mkdir(nn->nfsd_client_dir, ncl, name); if (IS_ERR(dentry)) /* XXX: tossing errors? */ return NULL; ret = nfsdfs_create_files(dentry, files, ncl, fdentries); if (ret) { nfsd_client_rmdir(dentry); return NULL; } return dentry; } /* Taken from __rpc_rmdir: */ void nfsd_client_rmdir(struct dentry *dentry) { simple_recursive_removal(dentry, clear_ncl); } static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) { struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); struct dentry *dentry; int ret; static const struct tree_descr nfsd_files[] = { [NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO}, /* Per-export io stats use same ops as exports file */ [NFSD_Export_Stats] = {"export_stats", &exports_nfsd_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", &export_features_fops, S_IRUGO}, [NFSD_FO_UnlockIP] = {"unlock_ip", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_FO_UnlockFS] = {"unlock_filesystem", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO}, [NFSD_Reply_Cache_Stats] = {"reply_cache_stats", &nfsd_reply_cache_stats_fops, S_IRUGO}, [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO}, #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, #ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, #endif [NFSD_V4EndGrace] = {"v4_end_grace", &transaction_ops, S_IWUSR|S_IRUGO}, #endif /* last one */ {""} }; ret = simple_fill_super(sb, 0x6e667364, nfsd_files); if (ret) return ret; _nfsd_symlink(sb->s_root, "supported_krb5_enctypes", "/proc/net/rpc/gss_krb5_enctypes"); dentry = nfsd_mkdir(sb->s_root, NULL, "clients"); if (IS_ERR(dentry)) return PTR_ERR(dentry); nn->nfsd_client_dir = dentry; return 0; } static int nfsd_fs_get_tree(struct fs_context *fc) { return get_tree_keyed(fc, nfsd_fill_super, get_net(fc->net_ns)); } static void nfsd_fs_free_fc(struct fs_context *fc) { if (fc->s_fs_info) put_net(fc->s_fs_info); } static const struct fs_context_operations nfsd_fs_context_ops = { .free = nfsd_fs_free_fc, .get_tree = nfsd_fs_get_tree, }; static int nfsd_init_fs_context(struct fs_context *fc) { put_user_ns(fc->user_ns); fc->user_ns = get_user_ns(fc->net_ns->user_ns); fc->ops = &nfsd_fs_context_ops; return 0; } static void nfsd_umount(struct super_block *sb) { struct net *net = sb->s_fs_info; nfsd_shutdown_threads(net); kill_litter_super(sb); put_net(net); } static struct file_system_type nfsd_fs_type = { .owner = THIS_MODULE, .name = "nfsd", .init_fs_context = nfsd_init_fs_context, .kill_sb = nfsd_umount, }; MODULE_ALIAS_FS("nfsd"); #ifdef CONFIG_PROC_FS static int exports_proc_open(struct inode *inode, struct file *file) { return exports_net_open(current->nsproxy->net_ns, file); } static const struct proc_ops exports_proc_ops = { .proc_open = exports_proc_open, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = seq_release, }; static int create_proc_exports_entry(void) { struct proc_dir_entry *entry; entry = proc_mkdir("fs/nfs", NULL); if (!entry) return -ENOMEM; entry = proc_create("exports", 0, entry, &exports_proc_ops); if (!entry) { remove_proc_entry("fs/nfs", NULL); return -ENOMEM; } return 0; } #else /* CONFIG_PROC_FS */ static int create_proc_exports_entry(void) { return 0; } #endif unsigned int nfsd_net_id; static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb, struct netlink_callback *cb, struct nfsd_genl_rqstp *rqstp) { void *hdr; u32 i; hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &nfsd_nl_family, 0, NFSD_CMD_RPC_STATUS_GET); if (!hdr) return -ENOBUFS; if (nla_put_be32(skb, NFSD_A_RPC_STATUS_XID, rqstp->rq_xid) || nla_put_u32(skb, NFSD_A_RPC_STATUS_FLAGS, rqstp->rq_flags) || nla_put_u32(skb, NFSD_A_RPC_STATUS_PROG, rqstp->rq_prog) || nla_put_u32(skb, NFSD_A_RPC_STATUS_PROC, rqstp->rq_proc) || nla_put_u8(skb, NFSD_A_RPC_STATUS_VERSION, rqstp->rq_vers) || nla_put_s64(skb, NFSD_A_RPC_STATUS_SERVICE_TIME, ktime_to_us(rqstp->rq_stime), NFSD_A_RPC_STATUS_PAD)) return -ENOBUFS; switch (rqstp->rq_saddr.sa_family) { case AF_INET: { const struct sockaddr_in *s_in, *d_in; s_in = (const struct sockaddr_in *)&rqstp->rq_saddr; d_in = (const struct sockaddr_in *)&rqstp->rq_daddr; if (nla_put_in_addr(skb, NFSD_A_RPC_STATUS_SADDR4, s_in->sin_addr.s_addr) || nla_put_in_addr(skb, NFSD_A_RPC_STATUS_DADDR4, d_in->sin_addr.s_addr) || nla_put_be16(skb, NFSD_A_RPC_STATUS_SPORT, s_in->sin_port) || nla_put_be16(skb, NFSD_A_RPC_STATUS_DPORT, d_in->sin_port)) return -ENOBUFS; break; } case AF_INET6: { const struct sockaddr_in6 *s_in, *d_in; s_in = (const struct sockaddr_in6 *)&rqstp->rq_saddr; d_in = (const struct sockaddr_in6 *)&rqstp->rq_daddr; if (nla_put_in6_addr(skb, NFSD_A_RPC_STATUS_SADDR6, &s_in->sin6_addr) || nla_put_in6_addr(skb, NFSD_A_RPC_STATUS_DADDR6, &d_in->sin6_addr) || nla_put_be16(skb, NFSD_A_RPC_STATUS_SPORT, s_in->sin6_port) || nla_put_be16(skb, NFSD_A_RPC_STATUS_DPORT, d_in->sin6_port)) return -ENOBUFS; break; } } for (i = 0; i < rqstp->rq_opcnt; i++) if (nla_put_u32(skb, NFSD_A_RPC_STATUS_COMPOUND_OPS, rqstp->rq_opnum[i])) return -ENOBUFS; genlmsg_end(skb, hdr); return 0; } /** * nfsd_nl_rpc_status_get_dumpit - Handle rpc_status_get dumpit * @skb: reply buffer * @cb: netlink metadata and command arguments * * Returns the size of the reply or a negative errno. */ int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { int i, ret, rqstp_index = 0; struct nfsd_net *nn; mutex_lock(&nfsd_mutex); nn = net_generic(sock_net(skb->sk), nfsd_net_id); if (!nn->nfsd_serv) { ret = -ENODEV; goto out_unlock; } rcu_read_lock(); for (i = 0; i < nn->nfsd_serv->sv_nrpools; i++) { struct svc_rqst *rqstp; if (i < cb->args[0]) /* already consumed */ continue; rqstp_index = 0; list_for_each_entry_rcu(rqstp, &nn->nfsd_serv->sv_pools[i].sp_all_threads, rq_all) { struct nfsd_genl_rqstp genl_rqstp; unsigned int status_counter; if (rqstp_index++ < cb->args[1]) /* already consumed */ continue; /* * Acquire rq_status_counter before parsing the rqst * fields. rq_status_counter is set to an odd value in * order to notify the consumers the rqstp fields are * meaningful. */ status_counter = smp_load_acquire(&rqstp->rq_status_counter); if (!(status_counter & 1)) continue; genl_rqstp.rq_xid = rqstp->rq_xid; genl_rqstp.rq_flags = rqstp->rq_flags; genl_rqstp.rq_vers = rqstp->rq_vers; genl_rqstp.rq_prog = rqstp->rq_prog; genl_rqstp.rq_proc = rqstp->rq_proc; genl_rqstp.rq_stime = rqstp->rq_stime; genl_rqstp.rq_opcnt = 0; memcpy(&genl_rqstp.rq_daddr, svc_daddr(rqstp), sizeof(struct sockaddr)); memcpy(&genl_rqstp.rq_saddr, svc_addr(rqstp), sizeof(struct sockaddr)); #ifdef CONFIG_NFSD_V4 if (rqstp->rq_vers == NFS4_VERSION && rqstp->rq_proc == NFSPROC4_COMPOUND) { /* NFSv4 compound */ struct nfsd4_compoundargs *args; int j; args = rqstp->rq_argp; genl_rqstp.rq_opcnt = args->opcnt; for (j = 0; j < genl_rqstp.rq_opcnt; j++) genl_rqstp.rq_opnum[j] = args->ops[j].opnum; } #endif /* CONFIG_NFSD_V4 */ /* * Acquire rq_status_counter before reporting the rqst * fields to the user. */ if (smp_load_acquire(&rqstp->rq_status_counter) != status_counter) continue; ret = nfsd_genl_rpc_status_compose_msg(skb, cb, &genl_rqstp); if (ret) goto out; } } cb->args[0] = i; cb->args[1] = rqstp_index; ret = skb->len; out: rcu_read_unlock(); out_unlock: mutex_unlock(&nfsd_mutex); return ret; } /** * nfsd_nl_threads_set_doit - set the number of running threads * @skb: reply buffer * @info: netlink metadata and command arguments * * Return 0 on success or a negative errno. */ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info) { int *nthreads, nrpools = 0, i, ret = -EOPNOTSUPP, rem; struct net *net = genl_info_net(info); struct nfsd_net *nn = net_generic(net, nfsd_net_id); const struct nlattr *attr; const char *scope = NULL; if (GENL_REQ_ATTR_CHECK(info, NFSD_A_SERVER_THREADS)) return -EINVAL; /* count number of SERVER_THREADS values */ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { if (nla_type(attr) == NFSD_A_SERVER_THREADS) nrpools++; } mutex_lock(&nfsd_mutex); nthreads = kcalloc(nrpools, sizeof(int), GFP_KERNEL); if (!nthreads) { ret = -ENOMEM; goto out_unlock; } i = 0; nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { if (nla_type(attr) == NFSD_A_SERVER_THREADS) { nthreads[i++] = nla_get_u32(attr); if (i >= nrpools) break; } } if (info->attrs[NFSD_A_SERVER_GRACETIME] || info->attrs[NFSD_A_SERVER_LEASETIME] || info->attrs[NFSD_A_SERVER_SCOPE]) { ret = -EBUSY; if (nn->nfsd_serv && nn->nfsd_serv->sv_nrthreads) goto out_unlock; ret = -EINVAL; attr = info->attrs[NFSD_A_SERVER_GRACETIME]; if (attr) { u32 gracetime = nla_get_u32(attr); if (gracetime < 10 || gracetime > 3600) goto out_unlock; nn->nfsd4_grace = gracetime; } attr = info->attrs[NFSD_A_SERVER_LEASETIME]; if (attr) { u32 leasetime = nla_get_u32(attr); if (leasetime < 10 || leasetime > 3600) goto out_unlock; nn->nfsd4_lease = leasetime; } attr = info->attrs[NFSD_A_SERVER_SCOPE]; if (attr) scope = nla_data(attr); } ret = nfsd_svc(nrpools, nthreads, net, get_current_cred(), scope); if (ret > 0) ret = 0; out_unlock: mutex_unlock(&nfsd_mutex); kfree(nthreads); return ret; } /** * nfsd_nl_threads_get_doit - get the number of running threads * @skb: reply buffer * @info: netlink metadata and command arguments * * Return 0 on success or a negative errno. */ int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct nfsd_net *nn = net_generic(net, nfsd_net_id); void *hdr; int err; skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; hdr = genlmsg_iput(skb, info); if (!hdr) { err = -EMSGSIZE; goto err_free_msg; } mutex_lock(&nfsd_mutex); err = nla_put_u32(skb, NFSD_A_SERVER_GRACETIME, nn->nfsd4_grace) || nla_put_u32(skb, NFSD_A_SERVER_LEASETIME, nn->nfsd4_lease) || nla_put_string(skb, NFSD_A_SERVER_SCOPE, nn->nfsd_name); if (err) goto err_unlock; if (nn->nfsd_serv) { int i; for (i = 0; i < nfsd_nrpools(net); ++i) { struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i]; err = nla_put_u32(skb, NFSD_A_SERVER_THREADS, sp->sp_nrthreads); if (err) goto err_unlock; } } else { err = nla_put_u32(skb, NFSD_A_SERVER_THREADS, 0); if (err) goto err_unlock; } mutex_unlock(&nfsd_mutex); genlmsg_end(skb, hdr); return genlmsg_reply(skb, info); err_unlock: mutex_unlock(&nfsd_mutex); err_free_msg: nlmsg_free(skb); return err; } /** * nfsd_nl_version_set_doit - set the nfs enabled versions * @skb: reply buffer * @info: netlink metadata and command arguments * * Return 0 on success or a negative errno. */ int nfsd_nl_version_set_doit(struct sk_buff *skb, struct genl_info *info) { const struct nlattr *attr; struct nfsd_net *nn; int i, rem; if (GENL_REQ_ATTR_CHECK(info, NFSD_A_SERVER_PROTO_VERSION)) return -EINVAL; mutex_lock(&nfsd_mutex); nn = net_generic(genl_info_net(info), nfsd_net_id); if (nn->nfsd_serv) { mutex_unlock(&nfsd_mutex); return -EBUSY; } /* clear current supported versions. */ nfsd_vers(nn, 2, NFSD_CLEAR); nfsd_vers(nn, 3, NFSD_CLEAR); for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) nfsd_minorversion(nn, i, NFSD_CLEAR); nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { struct nlattr *tb[NFSD_A_VERSION_MAX + 1]; u32 major, minor = 0; bool enabled; if (nla_type(attr) != NFSD_A_SERVER_PROTO_VERSION) continue; if (nla_parse_nested(tb, NFSD_A_VERSION_MAX, attr, nfsd_version_nl_policy, info->extack) < 0) continue; if (!tb[NFSD_A_VERSION_MAJOR]) continue; major = nla_get_u32(tb[NFSD_A_VERSION_MAJOR]); if (tb[NFSD_A_VERSION_MINOR]) minor = nla_get_u32(tb[NFSD_A_VERSION_MINOR]); enabled = nla_get_flag(tb[NFSD_A_VERSION_ENABLED]); switch (major) { case 4: nfsd_minorversion(nn, minor, enabled ? NFSD_SET : NFSD_CLEAR); break; case 3: case 2: if (!minor) nfsd_vers(nn, major, enabled ? NFSD_SET : NFSD_CLEAR); break; default: break; } } mutex_unlock(&nfsd_mutex); return 0; } /** * nfsd_nl_version_get_doit - get the enabled status for all supported nfs versions * @skb: reply buffer * @info: netlink metadata and command arguments * * Return 0 on success or a negative errno. */ int nfsd_nl_version_get_doit(struct sk_buff *skb, struct genl_info *info) { struct nfsd_net *nn; int i, err; void *hdr; skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; hdr = genlmsg_iput(skb, info); if (!hdr) { err = -EMSGSIZE; goto err_free_msg; } mutex_lock(&nfsd_mutex); nn = net_generic(genl_info_net(info), nfsd_net_id); for (i = 2; i <= 4; i++) { int j; for (j = 0; j <= NFSD_SUPPORTED_MINOR_VERSION; j++) { struct nlattr *attr; /* Don't record any versions the kernel doesn't have * compiled in */ if (!nfsd_support_version(i)) continue; /* NFSv{2,3} does not support minor numbers */ if (i < 4 && j) continue; attr = nla_nest_start(skb, NFSD_A_SERVER_PROTO_VERSION); if (!attr) { err = -EINVAL; goto err_nfsd_unlock; } if (nla_put_u32(skb, NFSD_A_VERSION_MAJOR, i) || nla_put_u32(skb, NFSD_A_VERSION_MINOR, j)) { err = -EINVAL; goto err_nfsd_unlock; } /* Set the enabled flag if the version is enabled */ if (nfsd_vers(nn, i, NFSD_TEST) && (i < 4 || nfsd_minorversion(nn, j, NFSD_TEST)) && nla_put_flag(skb, NFSD_A_VERSION_ENABLED)) { err = -EINVAL; goto err_nfsd_unlock; } nla_nest_end(skb, attr); } } mutex_unlock(&nfsd_mutex); genlmsg_end(skb, hdr); return genlmsg_reply(skb, info); err_nfsd_unlock: mutex_unlock(&nfsd_mutex); err_free_msg: nlmsg_free(skb); return err; } /** * nfsd_nl_listener_set_doit - set the nfs running sockets * @skb: reply buffer * @info: netlink metadata and command arguments * * Return 0 on success or a negative errno. */ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct svc_xprt *xprt, *tmp; const struct nlattr *attr; struct svc_serv *serv; LIST_HEAD(permsocks); struct nfsd_net *nn; bool delete = false; int err, rem; mutex_lock(&nfsd_mutex); err = nfsd_create_serv(net); if (err) { mutex_unlock(&nfsd_mutex); return err; } nn = net_generic(net, nfsd_net_id); serv = nn->nfsd_serv; spin_lock_bh(&serv->sv_lock); /* Move all of the old listener sockets to a temp list */ list_splice_init(&serv->sv_permsocks, &permsocks); /* * Walk the list of server_socks from userland and move any that match * back to sv_permsocks */ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { struct nlattr *tb[NFSD_A_SOCK_MAX + 1]; const char *xcl_name; struct sockaddr *sa; if (nla_type(attr) != NFSD_A_SERVER_SOCK_ADDR) continue; if (nla_parse_nested(tb, NFSD_A_SOCK_MAX, attr, nfsd_sock_nl_policy, info->extack) < 0) continue; if (!tb[NFSD_A_SOCK_ADDR] || !tb[NFSD_A_SOCK_TRANSPORT_NAME]) continue; if (nla_len(tb[NFSD_A_SOCK_ADDR]) < sizeof(*sa)) continue; xcl_name = nla_data(tb[NFSD_A_SOCK_TRANSPORT_NAME]); sa = nla_data(tb[NFSD_A_SOCK_ADDR]); /* Put back any matching sockets */ list_for_each_entry_safe(xprt, tmp, &permsocks, xpt_list) { /* This shouldn't be possible */ if (WARN_ON_ONCE(xprt->xpt_net != net)) { list_move(&xprt->xpt_list, &serv->sv_permsocks); continue; } /* If everything matches, put it back */ if (!strcmp(xprt->xpt_class->xcl_name, xcl_name) && rpc_cmp_addr_port(sa, (struct sockaddr *)&xprt->xpt_local)) { list_move(&xprt->xpt_list, &serv->sv_permsocks); break; } } } /* * If there are listener transports remaining on the permsocks list, * it means we were asked to remove a listener. */ if (!list_empty(&permsocks)) { list_splice_init(&permsocks, &serv->sv_permsocks); delete = true; } spin_unlock_bh(&serv->sv_lock); /* Do not remove listeners while there are active threads. */ if (serv->sv_nrthreads) { err = -EBUSY; goto out_unlock_mtx; } /* * Since we can't delete an arbitrary llist entry, destroy the * remaining listeners and recreate the list. */ if (delete) svc_xprt_destroy_all(serv, net); /* walk list of addrs again, open any that still don't exist */ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { struct nlattr *tb[NFSD_A_SOCK_MAX + 1]; const char *xcl_name; struct sockaddr *sa; int ret; if (nla_type(attr) != NFSD_A_SERVER_SOCK_ADDR) continue; if (nla_parse_nested(tb, NFSD_A_SOCK_MAX, attr, nfsd_sock_nl_policy, info->extack) < 0) continue; if (!tb[NFSD_A_SOCK_ADDR] || !tb[NFSD_A_SOCK_TRANSPORT_NAME]) continue; if (nla_len(tb[NFSD_A_SOCK_ADDR]) < sizeof(*sa)) continue; xcl_name = nla_data(tb[NFSD_A_SOCK_TRANSPORT_NAME]); sa = nla_data(tb[NFSD_A_SOCK_ADDR]); xprt = svc_find_listener(serv, xcl_name, net, sa); if (xprt) { if (delete) WARN_ONCE(1, "Transport type=%s already exists\n", xcl_name); svc_xprt_put(xprt); continue; } ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, 0, get_current_cred()); /* always save the latest error */ if (ret < 0) err = ret; } if (!serv->sv_nrthreads && list_empty(&nn->nfsd_serv->sv_permsocks)) nfsd_destroy_serv(net); out_unlock_mtx: mutex_unlock(&nfsd_mutex); return err; } /** * nfsd_nl_listener_get_doit - get the nfs running listeners * @skb: reply buffer * @info: netlink metadata and command arguments * * Return 0 on success or a negative errno. */ int nfsd_nl_listener_get_doit(struct sk_buff *skb, struct genl_info *info) { struct svc_xprt *xprt; struct svc_serv *serv; struct nfsd_net *nn; void *hdr; int err; skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; hdr = genlmsg_iput(skb, info); if (!hdr) { err = -EMSGSIZE; goto err_free_msg; } mutex_lock(&nfsd_mutex); nn = net_generic(genl_info_net(info), nfsd_net_id); /* no nfs server? Just send empty socket list */ if (!nn->nfsd_serv) goto out_unlock_mtx; serv = nn->nfsd_serv; spin_lock_bh(&serv->sv_lock); list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { struct nlattr *attr; attr = nla_nest_start(skb, NFSD_A_SERVER_SOCK_ADDR); if (!attr) { err = -EINVAL; goto err_serv_unlock; } if (nla_put_string(skb, NFSD_A_SOCK_TRANSPORT_NAME, xprt->xpt_class->xcl_name) || nla_put(skb, NFSD_A_SOCK_ADDR, sizeof(struct sockaddr_storage), &xprt->xpt_local)) { err = -EINVAL; goto err_serv_unlock; } nla_nest_end(skb, attr); } spin_unlock_bh(&serv->sv_lock); out_unlock_mtx: mutex_unlock(&nfsd_mutex); genlmsg_end(skb, hdr); return genlmsg_reply(skb, info); err_serv_unlock: spin_unlock_bh(&serv->sv_lock); mutex_unlock(&nfsd_mutex); err_free_msg: nlmsg_free(skb); return err; } /** * nfsd_nl_pool_mode_set_doit - set the number of running threads * @skb: reply buffer * @info: netlink metadata and command arguments * * Return 0 on success or a negative errno. */ int nfsd_nl_pool_mode_set_doit(struct sk_buff *skb, struct genl_info *info) { const struct nlattr *attr; if (GENL_REQ_ATTR_CHECK(info, NFSD_A_POOL_MODE_MODE)) return -EINVAL; attr = info->attrs[NFSD_A_POOL_MODE_MODE]; return sunrpc_set_pool_mode(nla_data(attr)); } /** * nfsd_nl_pool_mode_get_doit - get info about pool_mode * @skb: reply buffer * @info: netlink metadata and command arguments * * Return 0 on success or a negative errno. */ int nfsd_nl_pool_mode_get_doit(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); char buf[16]; void *hdr; int err; if (sunrpc_get_pool_mode(buf, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf)) return -ERANGE; skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; err = -EMSGSIZE; hdr = genlmsg_iput(skb, info); if (!hdr) goto err_free_msg; err = nla_put_string(skb, NFSD_A_POOL_MODE_MODE, buf) | nla_put_u32(skb, NFSD_A_POOL_MODE_NPOOLS, nfsd_nrpools(net)); if (err) goto err_free_msg; genlmsg_end(skb, hdr); return genlmsg_reply(skb, info); err_free_msg: nlmsg_free(skb); return err; } /** * nfsd_net_init - Prepare the nfsd_net portion of a new net namespace * @net: a freshly-created network namespace * * This information stays around as long as the network namespace is * alive whether or not there is an NFSD instance running in the * namespace. * * Returns zero on success, or a negative errno otherwise. */ static __net_init int nfsd_net_init(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); int retval; int i; retval = nfsd_export_init(net); if (retval) goto out_export_error; retval = nfsd_idmap_init(net); if (retval) goto out_idmap_error; retval = percpu_counter_init_many(nn->counter, 0, GFP_KERNEL, NFSD_STATS_COUNTERS_NUM); if (retval) goto out_repcache_error; memset(&nn->nfsd_svcstats, 0, sizeof(nn->nfsd_svcstats)); nn->nfsd_svcstats.program = &nfsd_programs[0]; if (!nfsd_proc_stat_init(net)) { retval = -ENOMEM; goto out_proc_error; } for (i = 0; i < sizeof(nn->nfsd_versions); i++) nn->nfsd_versions[i] = nfsd_support_version(i); for (i = 0; i < sizeof(nn->nfsd4_minorversions); i++) nn->nfsd4_minorversions[i] = nfsd_support_version(4); nn->nfsd_info.mutex = &nfsd_mutex; nn->nfsd_serv = NULL; nfsd4_init_leases_net(nn); get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); seqlock_init(&nn->writeverf_lock); #if IS_ENABLED(CONFIG_NFS_LOCALIO) spin_lock_init(&nn->local_clients_lock); INIT_LIST_HEAD(&nn->local_clients); #endif return 0; out_proc_error: percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM); out_repcache_error: nfsd_idmap_shutdown(net); out_idmap_error: nfsd_export_shutdown(net); out_export_error: return retval; } #if IS_ENABLED(CONFIG_NFS_LOCALIO) /** * nfsd_net_pre_exit - Disconnect localio clients from net namespace * @net: a network namespace that is about to be destroyed * * This invalidates ->net pointers held by localio clients * while they can still safely access nn->counter. */ static __net_exit void nfsd_net_pre_exit(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); nfs_localio_invalidate_clients(&nn->local_clients, &nn->local_clients_lock); } #endif /** * nfsd_net_exit - Release the nfsd_net portion of a net namespace * @net: a network namespace that is about to be destroyed * */ static __net_exit void nfsd_net_exit(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); nfsd_proc_stat_shutdown(net); percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM); nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); } static struct pernet_operations nfsd_net_ops = { .init = nfsd_net_init, #if IS_ENABLED(CONFIG_NFS_LOCALIO) .pre_exit = nfsd_net_pre_exit, #endif .exit = nfsd_net_exit, .id = &nfsd_net_id, .size = sizeof(struct nfsd_net), }; static int __init init_nfsd(void) { int retval; nfsd_debugfs_init(); retval = nfsd4_init_slabs(); if (retval) return retval; retval = nfsd4_init_pnfs(); if (retval) goto out_free_slabs; retval = nfsd_drc_slab_create(); if (retval) goto out_free_pnfs; nfsd_lockd_init(); /* lockd->nfsd callbacks */ retval = register_pernet_subsys(&nfsd_net_ops); if (retval < 0) goto out_free_lockd; retval = register_cld_notifier(); if (retval) goto out_free_subsys; retval = nfsd4_create_laundry_wq(); if (retval) goto out_free_cld; retval = register_filesystem(&nfsd_fs_type); if (retval) goto out_free_nfsd4; retval = genl_register_family(&nfsd_nl_family); if (retval) goto out_free_filesystem; retval = create_proc_exports_entry(); if (retval) goto out_free_all; nfsd_localio_ops_init(); return 0; out_free_all: genl_unregister_family(&nfsd_nl_family); out_free_filesystem: unregister_filesystem(&nfsd_fs_type); out_free_nfsd4: nfsd4_destroy_laundry_wq(); out_free_cld: unregister_cld_notifier(); out_free_subsys: unregister_pernet_subsys(&nfsd_net_ops); out_free_lockd: nfsd_lockd_shutdown(); nfsd_drc_slab_free(); out_free_pnfs: nfsd4_exit_pnfs(); out_free_slabs: nfsd4_free_slabs(); nfsd_debugfs_exit(); return retval; } static void __exit exit_nfsd(void) { remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); genl_unregister_family(&nfsd_nl_family); unregister_filesystem(&nfsd_fs_type); nfsd4_destroy_laundry_wq(); unregister_cld_notifier(); unregister_pernet_subsys(&nfsd_net_ops); nfsd_drc_slab_free(); nfsd_lockd_shutdown(); nfsd4_free_slabs(); nfsd4_exit_pnfs(); nfsd_debugfs_exit(); } MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); MODULE_DESCRIPTION("In-kernel NFS server"); MODULE_LICENSE("GPL"); module_init(init_nfsd) module_exit(exit_nfsd)
2401 2401 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_PGTABLE_INVERT_H #define _ASM_PGTABLE_INVERT_H 1 #ifndef __ASSEMBLER__ /* * A clear pte value is special, and doesn't get inverted. * * Note that even users that only pass a pgprot_t (rather * than a full pte) won't trigger the special zero case, * because even PAGE_NONE has _PAGE_PROTNONE | _PAGE_ACCESSED * set. So the all zero case really is limited to just the * cleared page table entry case. */ static inline bool __pte_needs_invert(u64 val) { return val && !(val & _PAGE_PRESENT); } /* Get a mask to xor with the page table entry to get the correct pfn. */ static inline u64 protnone_mask(u64 val) { return __pte_needs_invert(val) ? ~0ull : 0; } static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask) { /* * When a PTE transitions from NONE to !NONE or vice-versa * invert the PFN part to stop speculation. * pte_pfn undoes this when needed. */ if (__pte_needs_invert(oldval) != __pte_needs_invert(val)) val = (val & ~mask) | (~val & mask); return val; } #endif /* __ASSEMBLER__ */ #endif
6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 // SPDX-License-Identifier: GPL-2.0-only /* xfrm4_tunnel.c: Generic IP tunnel transformer. * * Copyright (C) 2003 David S. Miller (davem@redhat.com) */ #define pr_fmt(fmt) "IPsec: " fmt #include <linux/skbuff.h> #include <linux/module.h> #include <net/xfrm.h> #include <net/protocol.h> static int ipip_output(struct xfrm_state *x, struct sk_buff *skb) { skb_push(skb, -skb_network_offset(skb)); return 0; } static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb) { return ip_hdr(skb)->protocol; } static int ipip_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { if (x->props.mode != XFRM_MODE_TUNNEL) { NL_SET_ERR_MSG(extack, "IPv4 tunnel can only be used with tunnel mode"); return -EINVAL; } if (x->encap) { NL_SET_ERR_MSG(extack, "IPv4 tunnel is not compatible with encapsulation"); return -EINVAL; } x->props.header_len = sizeof(struct iphdr); return 0; } static void ipip_destroy(struct xfrm_state *x) { } static const struct xfrm_type ipip_type = { .owner = THIS_MODULE, .proto = IPPROTO_IPIP, .init_state = ipip_init_state, .destructor = ipip_destroy, .input = ipip_xfrm_rcv, .output = ipip_output }; static int xfrm_tunnel_rcv(struct sk_buff *skb) { return xfrm4_rcv_spi(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr); } static int xfrm_tunnel_err(struct sk_buff *skb, u32 info) { return -ENOENT; } static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = { .handler = xfrm_tunnel_rcv, .err_handler = xfrm_tunnel_err, .priority = 4, }; #if IS_ENABLED(CONFIG_IPV6) static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = { .handler = xfrm_tunnel_rcv, .err_handler = xfrm_tunnel_err, .priority = 3, }; #endif static int __init ipip_init(void) { if (xfrm_register_type(&ipip_type, AF_INET) < 0) { pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } if (xfrm4_tunnel_register(&xfrm_tunnel_handler, AF_INET)) { pr_info("%s: can't add xfrm handler for AF_INET\n", __func__); xfrm_unregister_type(&ipip_type, AF_INET); return -EAGAIN; } #if IS_ENABLED(CONFIG_IPV6) if (xfrm4_tunnel_register(&xfrm64_tunnel_handler, AF_INET6)) { pr_info("%s: can't add xfrm handler for AF_INET6\n", __func__); xfrm4_tunnel_deregister(&xfrm_tunnel_handler, AF_INET); xfrm_unregister_type(&ipip_type, AF_INET); return -EAGAIN; } #endif return 0; } static void __exit ipip_fini(void) { #if IS_ENABLED(CONFIG_IPV6) if (xfrm4_tunnel_deregister(&xfrm64_tunnel_handler, AF_INET6)) pr_info("%s: can't remove xfrm handler for AF_INET6\n", __func__); #endif if (xfrm4_tunnel_deregister(&xfrm_tunnel_handler, AF_INET)) pr_info("%s: can't remove xfrm handler for AF_INET\n", __func__); xfrm_unregister_type(&ipip_type, AF_INET); } module_init(ipip_init); module_exit(ipip_fini); MODULE_DESCRIPTION("IPv4 XFRM tunnel driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_IPIP);
5 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 /* Protective Load Balancing (PLB) * * PLB was designed to reduce link load imbalance across datacenter * switches. PLB is a host-based optimization; it leverages congestion * signals from the transport layer to randomly change the path of the * connection experiencing sustained congestion. PLB prefers to repath * after idle periods to minimize packet reordering. It repaths by * changing the IPv6 Flow Label on the packets of a connection, which * datacenter switches include as part of ECMP/WCMP hashing. * * PLB is described in detail in: * * Mubashir Adnan Qureshi, Yuchung Cheng, Qianwen Yin, Qiaobin Fu, * Gautam Kumar, Masoud Moshref, Junhua Yan, Van Jacobson, * David Wetherall,Abdul Kabbani: * "PLB: Congestion Signals are Simple and Effective for * Network Load Balancing" * In ACM SIGCOMM 2022, Amsterdam Netherlands. * */ #include <net/tcp.h> /* Called once per round-trip to update PLB state for a connection. */ void tcp_plb_update_state(const struct sock *sk, struct tcp_plb_state *plb, const int cong_ratio) { struct net *net = sock_net(sk); if (!READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled)) return; if (cong_ratio >= 0) { if (cong_ratio < READ_ONCE(net->ipv4.sysctl_tcp_plb_cong_thresh)) plb->consec_cong_rounds = 0; else if (plb->consec_cong_rounds < READ_ONCE(net->ipv4.sysctl_tcp_plb_rehash_rounds)) plb->consec_cong_rounds++; } } EXPORT_SYMBOL_GPL(tcp_plb_update_state); /* Check whether recent congestion has been persistent enough to warrant * a load balancing decision that switches the connection to another path. */ void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb) { struct net *net = sock_net(sk); u32 max_suspend; bool forced_rehash = false, idle_rehash = false; if (!READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled)) return; forced_rehash = plb->consec_cong_rounds >= READ_ONCE(net->ipv4.sysctl_tcp_plb_rehash_rounds); /* If sender goes idle then we check whether to rehash. */ idle_rehash = READ_ONCE(net->ipv4.sysctl_tcp_plb_idle_rehash_rounds) && !tcp_sk(sk)->packets_out && plb->consec_cong_rounds >= READ_ONCE(net->ipv4.sysctl_tcp_plb_idle_rehash_rounds); if (!forced_rehash && !idle_rehash) return; /* Note that tcp_jiffies32 can wrap; we detect wraps by checking for * cases where the max suspension end is before the actual suspension * end. We clear pause_until to 0 to indicate there is no recent * RTO event that constrains PLB rehashing. */ max_suspend = 2 * READ_ONCE(net->ipv4.sysctl_tcp_plb_suspend_rto_sec) * HZ; if (plb->pause_until && (!before(tcp_jiffies32, plb->pause_until) || before(tcp_jiffies32 + max_suspend, plb->pause_until))) plb->pause_until = 0; if (plb->pause_until) return; sk_rethink_txhash(sk); plb->consec_cong_rounds = 0; tcp_sk(sk)->plb_rehash++; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPLBREHASH); } EXPORT_SYMBOL_GPL(tcp_plb_check_rehash); /* Upon RTO, disallow load balancing for a while, to avoid having load * balancing decisions switch traffic to a black-holed path that was * previously avoided with a sk_rethink_txhash() call at RTO time. */ void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb) { struct net *net = sock_net(sk); u32 pause; if (!READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled)) return; pause = READ_ONCE(net->ipv4.sysctl_tcp_plb_suspend_rto_sec) * HZ; pause += get_random_u32_below(pause); plb->pause_until = tcp_jiffies32 + pause; /* Reset PLB state upon RTO, since an RTO causes a sk_rethink_txhash() call * that may switch this connection to a path with completely different * congestion characteristics. */ plb->consec_cong_rounds = 0; } EXPORT_SYMBOL_GPL(tcp_plb_update_state_upon_rto);
29 285 1 1 372 373 485 76 2 173 2 41 480 8 482 3 16 503 503 29 28 24 1 28 10 480 479 480 479 480 16 12 474 111 26 1 6 68 2 57 92 56 32 462 47 462 257 2 37 37 71 118 5 114 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 // SPDX-License-Identifier: GPL-2.0 #include <linux/mm.h> #include <linux/rmap.h> #include <linux/hugetlb.h> #include <linux/swap.h> #include <linux/swapops.h> #include "internal.h" static inline bool not_found(struct page_vma_mapped_walk *pvmw) { page_vma_mapped_walk_done(pvmw); return false; } static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp, spinlock_t **ptlp) { pte_t ptent; if (pvmw->flags & PVMW_SYNC) { /* Use the stricter lookup */ pvmw->pte = pte_offset_map_lock(pvmw->vma->vm_mm, pvmw->pmd, pvmw->address, &pvmw->ptl); *ptlp = pvmw->ptl; return !!pvmw->pte; } again: /* * It is important to return the ptl corresponding to pte, * in case *pvmw->pmd changes underneath us; so we need to * return it even when choosing not to lock, in case caller * proceeds to loop over next ptes, and finds a match later. * Though, in most cases, page lock already protects this. */ pvmw->pte = pte_offset_map_rw_nolock(pvmw->vma->vm_mm, pvmw->pmd, pvmw->address, pmdvalp, ptlp); if (!pvmw->pte) return false; ptent = ptep_get(pvmw->pte); if (pvmw->flags & PVMW_MIGRATION) { if (!is_swap_pte(ptent)) return false; } else if (is_swap_pte(ptent)) { swp_entry_t entry; /* * Handle un-addressable ZONE_DEVICE memory. * * We get here when we are trying to unmap a private * device page from the process address space. Such * page is not CPU accessible and thus is mapped as * a special swap entry, nonetheless it still does * count as a valid regular mapping for the page * (and is accounted as such in page maps count). * * So handle this special case as if it was a normal * page mapping ie lock CPU page table and return true. * * For more details on device private memory see HMM * (include/linux/hmm.h or mm/hmm.c). */ entry = pte_to_swp_entry(ptent); if (!is_device_private_entry(entry) && !is_device_exclusive_entry(entry)) return false; } else if (!pte_present(ptent)) { return false; } spin_lock(*ptlp); if (unlikely(!pmd_same(*pmdvalp, pmdp_get_lockless(pvmw->pmd)))) { pte_unmap_unlock(pvmw->pte, *ptlp); goto again; } pvmw->ptl = *ptlp; return true; } /** * check_pte - check if [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages) is * mapped at the @pvmw->pte * @pvmw: page_vma_mapped_walk struct, includes a pair pte and pfn range * for checking * @pte_nr: the number of small pages described by @pvmw->pte. * * page_vma_mapped_walk() found a place where pfn range is *potentially* * mapped. check_pte() has to validate this. * * pvmw->pte may point to empty PTE, swap PTE or PTE pointing to * arbitrary page. * * If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration * entry that points to [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages) * * If PVMW_MIGRATION flag is not set, returns true if pvmw->pte points to * [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages) * * Otherwise, return false. * */ static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr) { unsigned long pfn; pte_t ptent = ptep_get(pvmw->pte); if (pvmw->flags & PVMW_MIGRATION) { swp_entry_t entry; if (!is_swap_pte(ptent)) return false; entry = pte_to_swp_entry(ptent); if (!is_migration_entry(entry)) return false; pfn = swp_offset_pfn(entry); } else if (is_swap_pte(ptent)) { swp_entry_t entry; /* Handle un-addressable ZONE_DEVICE memory */ entry = pte_to_swp_entry(ptent); if (!is_device_private_entry(entry) && !is_device_exclusive_entry(entry)) return false; pfn = swp_offset_pfn(entry); } else { if (!pte_present(ptent)) return false; pfn = pte_pfn(ptent); } if ((pfn + pte_nr - 1) < pvmw->pfn) return false; if (pfn > (pvmw->pfn + pvmw->nr_pages - 1)) return false; return true; } /* Returns true if the two ranges overlap. Careful to not overflow. */ static bool check_pmd(unsigned long pfn, struct page_vma_mapped_walk *pvmw) { if ((pfn + HPAGE_PMD_NR - 1) < pvmw->pfn) return false; if (pfn > pvmw->pfn + pvmw->nr_pages - 1) return false; return true; } static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size) { pvmw->address = (pvmw->address + size) & ~(size - 1); if (!pvmw->address) pvmw->address = ULONG_MAX; } /** * page_vma_mapped_walk - check if @pvmw->pfn is mapped in @pvmw->vma at * @pvmw->address * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags * must be set. pmd, pte and ptl must be NULL. * * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is * adjusted if needed (for PTE-mapped THPs). * * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in * a loop to find all PTEs that map the THP. * * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry * regardless of which page table level the page is mapped at. @pvmw->pmd is * NULL. * * Returns false if there are no more page table entries for the page in * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped. * * If you need to stop the walk before page_vma_mapped_walk() returned false, * use page_vma_mapped_walk_done(). It will do the housekeeping. */ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) { struct vm_area_struct *vma = pvmw->vma; struct mm_struct *mm = vma->vm_mm; unsigned long end; spinlock_t *ptl; pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t pmde; /* The only possible pmd mapping has been handled on last iteration */ if (pvmw->pmd && !pvmw->pte) return not_found(pvmw); if (unlikely(is_vm_hugetlb_page(vma))) { struct hstate *hstate = hstate_vma(vma); unsigned long size = huge_page_size(hstate); /* The only possible mapping was handled on last iteration */ if (pvmw->pte) return not_found(pvmw); /* * All callers that get here will already hold the * i_mmap_rwsem. Therefore, no additional locks need to be * taken before calling hugetlb_walk(). */ pvmw->pte = hugetlb_walk(vma, pvmw->address, size); if (!pvmw->pte) return false; pvmw->ptl = huge_pte_lock(hstate, mm, pvmw->pte); if (!check_pte(pvmw, pages_per_huge_page(hstate))) return not_found(pvmw); return true; } end = vma_address_end(pvmw); if (pvmw->pte) goto next_pte; restart: do { pgd = pgd_offset(mm, pvmw->address); if (!pgd_present(*pgd)) { step_forward(pvmw, PGDIR_SIZE); continue; } p4d = p4d_offset(pgd, pvmw->address); if (!p4d_present(*p4d)) { step_forward(pvmw, P4D_SIZE); continue; } pud = pud_offset(p4d, pvmw->address); if (!pud_present(*pud)) { step_forward(pvmw, PUD_SIZE); continue; } pvmw->pmd = pmd_offset(pud, pvmw->address); /* * Make sure the pmd value isn't cached in a register by the * compiler and used as a stale value after we've observed a * subsequent update. */ pmde = pmdp_get_lockless(pvmw->pmd); if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde) || (pmd_present(pmde) && pmd_devmap(pmde))) { pvmw->ptl = pmd_lock(mm, pvmw->pmd); pmde = *pvmw->pmd; if (!pmd_present(pmde)) { swp_entry_t entry; if (!thp_migration_supported() || !(pvmw->flags & PVMW_MIGRATION)) return not_found(pvmw); entry = pmd_to_swp_entry(pmde); if (!is_migration_entry(entry) || !check_pmd(swp_offset_pfn(entry), pvmw)) return not_found(pvmw); return true; } if (likely(pmd_trans_huge(pmde) || pmd_devmap(pmde))) { if (pvmw->flags & PVMW_MIGRATION) return not_found(pvmw); if (!check_pmd(pmd_pfn(pmde), pvmw)) return not_found(pvmw); return true; } /* THP pmd was split under us: handle on pte level */ spin_unlock(pvmw->ptl); pvmw->ptl = NULL; } else if (!pmd_present(pmde)) { /* * If PVMW_SYNC, take and drop THP pmd lock so that we * cannot return prematurely, while zap_huge_pmd() has * cleared *pmd but not decremented compound_mapcount(). */ if ((pvmw->flags & PVMW_SYNC) && thp_vma_suitable_order(vma, pvmw->address, PMD_ORDER) && (pvmw->nr_pages >= HPAGE_PMD_NR)) { spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); spin_unlock(ptl); } step_forward(pvmw, PMD_SIZE); continue; } if (!map_pte(pvmw, &pmde, &ptl)) { if (!pvmw->pte) goto restart; goto next_pte; } this_pte: if (check_pte(pvmw, 1)) return true; next_pte: do { pvmw->address += PAGE_SIZE; if (pvmw->address >= end) return not_found(pvmw); /* Did we cross page table boundary? */ if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) { if (pvmw->ptl) { spin_unlock(pvmw->ptl); pvmw->ptl = NULL; } pte_unmap(pvmw->pte); pvmw->pte = NULL; goto restart; } pvmw->pte++; } while (pte_none(ptep_get(pvmw->pte))); if (!pvmw->ptl) { spin_lock(ptl); if (unlikely(!pmd_same(pmde, pmdp_get_lockless(pvmw->pmd)))) { pte_unmap_unlock(pvmw->pte, ptl); pvmw->pte = NULL; goto restart; } pvmw->ptl = ptl; } goto this_pte; } while (pvmw->address < end); return false; } #ifdef CONFIG_MEMORY_FAILURE /** * page_mapped_in_vma - check whether a page is really mapped in a VMA * @page: the page to test * @vma: the VMA to test * * Return: The address the page is mapped at if the page is in the range * covered by the VMA and present in the page table. If the page is * outside the VMA or not present, returns -EFAULT. * Only valid for normal file or anonymous VMAs. */ unsigned long page_mapped_in_vma(const struct page *page, struct vm_area_struct *vma) { const struct folio *folio = page_folio(page); struct page_vma_mapped_walk pvmw = { .pfn = page_to_pfn(page), .nr_pages = 1, .vma = vma, .flags = PVMW_SYNC, }; pvmw.address = vma_address(vma, page_pgoff(folio, page), 1); if (pvmw.address == -EFAULT) goto out; if (!page_vma_mapped_walk(&pvmw)) return -EFAULT; page_vma_mapped_walk_done(&pvmw); out: return pvmw.address; } #endif
13 13 8 8 11 3 8 8 8 8 8 1 10 11 3 11 11 11 1 1 10 1 11 11 11 1 11 11 11 11 11 8 11 8 8 12 1 3 8 13 1 12 11 1 1 1 11 13 12 13 11 11 1 10 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 // SPDX-License-Identifier: GPL-2.0 /* Copyright 2011-2014 Autronica Fire and Security AS * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se * * Frame router for HSR and PRP. */ #include "hsr_forward.h" #include <linux/types.h> #include <linux/skbuff.h> #include <linux/etherdevice.h> #include <linux/if_vlan.h> #include "hsr_main.h" #include "hsr_framereg.h" struct hsr_node; /* The uses I can see for these HSR supervision frames are: * 1) Use the frames that are sent after node initialization ("HSR_TLV.Type = * 22") to reset any sequence_nr counters belonging to that node. Useful if * the other node's counter has been reset for some reason. * -- * Or not - resetting the counter and bridging the frame would create a * loop, unfortunately. * * 2) Use the LifeCheck frames to detect ring breaks. I.e. if no LifeCheck * frame is received from a particular node, we know something is wrong. * We just register these (as with normal frames) and throw them away. * * 3) Allow different MAC addresses for the two slave interfaces, using the * MacAddressA field. */ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) { struct ethhdr *eth_hdr; struct hsr_sup_tag *hsr_sup_tag; struct hsrv1_ethhdr_sp *hsr_V1_hdr; struct hsr_sup_tlv *hsr_sup_tlv; u16 total_length = 0; WARN_ON_ONCE(!skb_mac_header_was_set(skb)); eth_hdr = (struct ethhdr *)skb_mac_header(skb); /* Correct addr? */ if (!ether_addr_equal(eth_hdr->h_dest, hsr->sup_multicast_addr)) return false; /* Correct ether type?. */ if (!(eth_hdr->h_proto == htons(ETH_P_PRP) || eth_hdr->h_proto == htons(ETH_P_HSR))) return false; /* Get the supervision header from correct location. */ if (eth_hdr->h_proto == htons(ETH_P_HSR)) { /* Okay HSRv1. */ total_length = sizeof(struct hsrv1_ethhdr_sp); if (!pskb_may_pull(skb, total_length)) return false; hsr_V1_hdr = (struct hsrv1_ethhdr_sp *)skb_mac_header(skb); if (hsr_V1_hdr->hsr.encap_proto != htons(ETH_P_PRP)) return false; hsr_sup_tag = &hsr_V1_hdr->hsr_sup; } else { total_length = sizeof(struct hsrv0_ethhdr_sp); if (!pskb_may_pull(skb, total_length)) return false; hsr_sup_tag = &((struct hsrv0_ethhdr_sp *)skb_mac_header(skb))->hsr_sup; } if (hsr_sup_tag->tlv.HSR_TLV_type != HSR_TLV_ANNOUNCE && hsr_sup_tag->tlv.HSR_TLV_type != HSR_TLV_LIFE_CHECK && hsr_sup_tag->tlv.HSR_TLV_type != PRP_TLV_LIFE_CHECK_DD && hsr_sup_tag->tlv.HSR_TLV_type != PRP_TLV_LIFE_CHECK_DA) return false; if (hsr_sup_tag->tlv.HSR_TLV_length != 12 && hsr_sup_tag->tlv.HSR_TLV_length != sizeof(struct hsr_sup_payload)) return false; /* Get next tlv */ total_length += hsr_sup_tag->tlv.HSR_TLV_length; if (!pskb_may_pull(skb, total_length)) return false; skb_pull(skb, total_length); hsr_sup_tlv = (struct hsr_sup_tlv *)skb->data; skb_push(skb, total_length); /* if this is a redbox supervision frame we need to verify * that more data is available */ if (hsr_sup_tlv->HSR_TLV_type == PRP_TLV_REDBOX_MAC) { /* tlv length must be a length of a mac address */ if (hsr_sup_tlv->HSR_TLV_length != sizeof(struct hsr_sup_payload)) return false; /* make sure another tlv follows */ total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tlv->HSR_TLV_length; if (!pskb_may_pull(skb, total_length)) return false; /* get next tlv */ skb_pull(skb, total_length); hsr_sup_tlv = (struct hsr_sup_tlv *)skb->data; skb_push(skb, total_length); } /* end of tlvs must follow at the end */ if (hsr_sup_tlv->HSR_TLV_type == HSR_TLV_EOT && hsr_sup_tlv->HSR_TLV_length != 0) return false; return true; } static bool is_proxy_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) { struct hsr_sup_payload *payload; struct ethhdr *eth_hdr; u16 total_length = 0; eth_hdr = (struct ethhdr *)skb_mac_header(skb); /* Get the HSR protocol revision. */ if (eth_hdr->h_proto == htons(ETH_P_HSR)) total_length = sizeof(struct hsrv1_ethhdr_sp); else total_length = sizeof(struct hsrv0_ethhdr_sp); if (!pskb_may_pull(skb, total_length + sizeof(struct hsr_sup_payload))) return false; skb_pull(skb, total_length); payload = (struct hsr_sup_payload *)skb->data; skb_push(skb, total_length); /* For RedBox (HSR-SAN) check if we have received the supervision * frame with MAC addresses from own ProxyNodeTable. */ return hsr_is_node_in_db(&hsr->proxy_node_db, payload->macaddress_A); } static struct sk_buff *create_stripped_skb_hsr(struct sk_buff *skb_in, struct hsr_frame_info *frame) { struct sk_buff *skb; int copylen; unsigned char *dst, *src; skb_pull(skb_in, HSR_HLEN); skb = __pskb_copy(skb_in, skb_headroom(skb_in) - HSR_HLEN, GFP_ATOMIC); skb_push(skb_in, HSR_HLEN); if (!skb) return NULL; skb_reset_mac_header(skb); if (skb->ip_summed == CHECKSUM_PARTIAL) skb->csum_start -= HSR_HLEN; copylen = 2 * ETH_ALEN; if (frame->is_vlan) copylen += VLAN_HLEN; src = skb_mac_header(skb_in); dst = skb_mac_header(skb); memcpy(dst, src, copylen); skb->protocol = eth_hdr(skb)->h_proto; return skb; } struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame, struct hsr_port *port) { if (!frame->skb_std) { if (frame->skb_hsr) frame->skb_std = create_stripped_skb_hsr(frame->skb_hsr, frame); else netdev_warn_once(port->dev, "Unexpected frame received in hsr_get_untagged_frame()\n"); if (!frame->skb_std) return NULL; } return skb_clone(frame->skb_std, GFP_ATOMIC); } struct sk_buff *prp_get_untagged_frame(struct hsr_frame_info *frame, struct hsr_port *port) { if (!frame->skb_std) { if (frame->skb_prp) { /* trim the skb by len - HSR_HLEN to exclude RCT */ skb_trim(frame->skb_prp, frame->skb_prp->len - HSR_HLEN); frame->skb_std = __pskb_copy(frame->skb_prp, skb_headroom(frame->skb_prp), GFP_ATOMIC); } else { /* Unexpected */ WARN_ONCE(1, "%s:%d: Unexpected frame received (port_src %s)\n", __FILE__, __LINE__, port->dev->name); return NULL; } } return skb_clone(frame->skb_std, GFP_ATOMIC); } static void prp_set_lan_id(struct prp_rct *trailer, struct hsr_port *port) { int lane_id; if (port->type == HSR_PT_SLAVE_A) lane_id = 0; else lane_id = 1; /* Add net_id in the upper 3 bits of lane_id */ lane_id |= port->hsr->net_id; set_prp_lan_id(trailer, lane_id); } /* Tailroom for PRP rct should have been created before calling this */ static struct sk_buff *prp_fill_rct(struct sk_buff *skb, struct hsr_frame_info *frame, struct hsr_port *port) { struct prp_rct *trailer; int min_size = ETH_ZLEN; int lsdu_size; if (!skb) return skb; if (frame->is_vlan) min_size = VLAN_ETH_ZLEN; if (skb_put_padto(skb, min_size)) return NULL; trailer = (struct prp_rct *)skb_put(skb, HSR_HLEN); lsdu_size = skb->len - 14; if (frame->is_vlan) lsdu_size -= 4; prp_set_lan_id(trailer, port); set_prp_LSDU_size(trailer, lsdu_size); trailer->sequence_nr = htons(frame->sequence_nr); trailer->PRP_suffix = htons(ETH_P_PRP); skb->protocol = eth_hdr(skb)->h_proto; return skb; } static void hsr_set_path_id(struct hsr_ethhdr *hsr_ethhdr, struct hsr_port *port) { int path_id; if (port->type == HSR_PT_SLAVE_A) path_id = 0; else path_id = 1; set_hsr_tag_path(&hsr_ethhdr->hsr_tag, path_id); } static struct sk_buff *hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame, struct hsr_port *port, u8 proto_version) { struct hsr_ethhdr *hsr_ethhdr; unsigned char *pc; int lsdu_size; /* pad to minimum packet size which is 60 + 6 (HSR tag) */ if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN)) return NULL; lsdu_size = skb->len - 14; if (frame->is_vlan) lsdu_size -= 4; pc = skb_mac_header(skb); if (frame->is_vlan) /* This 4-byte shift (size of a vlan tag) does not * mean that the ethhdr starts there. But rather it * provides the proper environment for accessing * the fields, such as hsr_tag etc., just like * when the vlan tag is not there. This is because * the hsr tag is after the vlan tag. */ hsr_ethhdr = (struct hsr_ethhdr *)(pc + VLAN_HLEN); else hsr_ethhdr = (struct hsr_ethhdr *)pc; hsr_set_path_id(hsr_ethhdr, port); set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size); hsr_ethhdr->hsr_tag.sequence_nr = htons(frame->sequence_nr); hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; hsr_ethhdr->ethhdr.h_proto = htons(proto_version ? ETH_P_HSR : ETH_P_PRP); skb->protocol = hsr_ethhdr->ethhdr.h_proto; return skb; } /* If the original frame was an HSR tagged frame, just clone it to be sent * unchanged. Otherwise, create a private frame especially tagged for 'port'. */ struct sk_buff *hsr_create_tagged_frame(struct hsr_frame_info *frame, struct hsr_port *port) { unsigned char *dst, *src; struct sk_buff *skb; int movelen; if (frame->skb_hsr) { struct hsr_ethhdr *hsr_ethhdr = (struct hsr_ethhdr *)skb_mac_header(frame->skb_hsr); /* set the lane id properly */ hsr_set_path_id(hsr_ethhdr, port); return skb_clone(frame->skb_hsr, GFP_ATOMIC); } else if (port->dev->features & NETIF_F_HW_HSR_TAG_INS) { return skb_clone(frame->skb_std, GFP_ATOMIC); } /* Create the new skb with enough headroom to fit the HSR tag */ skb = __pskb_copy(frame->skb_std, skb_headroom(frame->skb_std) + HSR_HLEN, GFP_ATOMIC); if (!skb) return NULL; skb_reset_mac_header(skb); if (skb->ip_summed == CHECKSUM_PARTIAL) skb->csum_start += HSR_HLEN; movelen = ETH_HLEN; if (frame->is_vlan) movelen += VLAN_HLEN; src = skb_mac_header(skb); dst = skb_push(skb, HSR_HLEN); memmove(dst, src, movelen); skb_reset_mac_header(skb); /* skb_put_padto free skb on error and hsr_fill_tag returns NULL in * that case */ return hsr_fill_tag(skb, frame, port, port->hsr->prot_version); } struct sk_buff *prp_create_tagged_frame(struct hsr_frame_info *frame, struct hsr_port *port) { struct sk_buff *skb; if (frame->skb_prp) { struct prp_rct *trailer = skb_get_PRP_rct(frame->skb_prp); if (trailer) { prp_set_lan_id(trailer, port); } else { WARN_ONCE(!trailer, "errored PRP skb"); return NULL; } return skb_clone(frame->skb_prp, GFP_ATOMIC); } else if (port->dev->features & NETIF_F_HW_HSR_TAG_INS) { return skb_clone(frame->skb_std, GFP_ATOMIC); } skb = skb_copy_expand(frame->skb_std, skb_headroom(frame->skb_std), skb_tailroom(frame->skb_std) + HSR_HLEN, GFP_ATOMIC); return prp_fill_rct(skb, frame, port); } static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev, struct hsr_node *node_src) { bool was_multicast_frame; int res, recv_len; was_multicast_frame = (skb->pkt_type == PACKET_MULTICAST); hsr_addr_subst_source(node_src, skb); skb_pull(skb, ETH_HLEN); recv_len = skb->len; res = netif_rx(skb); if (res == NET_RX_DROP) { dev->stats.rx_dropped++; } else { dev->stats.rx_packets++; dev->stats.rx_bytes += recv_len; if (was_multicast_frame) dev->stats.multicast++; } } static int hsr_xmit(struct sk_buff *skb, struct hsr_port *port, struct hsr_frame_info *frame) { if (frame->port_rcv->type == HSR_PT_MASTER) { hsr_addr_subst_dest(frame->node_src, skb, port); /* Address substitution (IEC62439-3 pp 26, 50): replace mac * address of outgoing frame with that of the outgoing slave's. */ ether_addr_copy(eth_hdr(skb)->h_source, port->dev->dev_addr); } /* When HSR node is used as RedBox - the frame received from HSR ring * requires source MAC address (SA) replacement to one which can be * recognized by SAN devices (otherwise, frames are dropped by switch) */ if (port->type == HSR_PT_INTERLINK) ether_addr_copy(eth_hdr(skb)->h_source, port->hsr->macaddress_redbox); return dev_queue_xmit(skb); } bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port) { return ((frame->port_rcv->type == HSR_PT_SLAVE_A && port->type == HSR_PT_SLAVE_B) || (frame->port_rcv->type == HSR_PT_SLAVE_B && port->type == HSR_PT_SLAVE_A)); } bool hsr_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port) { struct sk_buff *skb; if (port->dev->features & NETIF_F_HW_HSR_FWD) return prp_drop_frame(frame, port); /* RedBox specific frames dropping policies * * Do not send HSR supervisory frames to SAN devices */ if (frame->is_supervision && port->type == HSR_PT_INTERLINK) return true; /* Do not forward to other HSR port (A or B) unicast frames which * are addressed to interlink port (and are in the ProxyNodeTable). */ skb = frame->skb_hsr; if (skb && prp_drop_frame(frame, port) && is_unicast_ether_addr(eth_hdr(skb)->h_dest) && hsr_is_node_in_db(&port->hsr->proxy_node_db, eth_hdr(skb)->h_dest)) { return true; } /* Do not forward to port C (Interlink) frames from nodes A and B * if DA is in NodeTable. */ if ((frame->port_rcv->type == HSR_PT_SLAVE_A || frame->port_rcv->type == HSR_PT_SLAVE_B) && port->type == HSR_PT_INTERLINK) { skb = frame->skb_hsr; if (skb && is_unicast_ether_addr(eth_hdr(skb)->h_dest) && hsr_is_node_in_db(&port->hsr->node_db, eth_hdr(skb)->h_dest)) { return true; } } /* Do not forward to port A and B unicast frames received on the * interlink port if it is addressed to one of nodes registered in * the ProxyNodeTable. */ if ((port->type == HSR_PT_SLAVE_A || port->type == HSR_PT_SLAVE_B) && frame->port_rcv->type == HSR_PT_INTERLINK) { skb = frame->skb_std; if (skb && is_unicast_ether_addr(eth_hdr(skb)->h_dest) && hsr_is_node_in_db(&port->hsr->proxy_node_db, eth_hdr(skb)->h_dest)) { return true; } } return false; } /* Forward the frame through all devices except: * - Back through the receiving device * - If it's a HSR frame: through a device where it has passed before * - if it's a PRP frame: through another PRP slave device (no bridge) * - To the local HSR master only if the frame is directly addressed to it, or * a non-supervision multicast or broadcast frame. * * HSR slave devices should insert a HSR tag into the frame, or forward the * frame unchanged if it's already tagged. Interlink devices should strip HSR * tags if they're of the non-HSR type (but only after duplicate discard). The * master device always strips HSR tags. */ static void hsr_forward_do(struct hsr_frame_info *frame) { struct hsr_port *port; struct sk_buff *skb; bool sent = false; hsr_for_each_port(frame->port_rcv->hsr, port) { struct hsr_priv *hsr = port->hsr; /* Don't send frame back the way it came */ if (port == frame->port_rcv) continue; /* Don't deliver locally unless we should */ if (port->type == HSR_PT_MASTER && !frame->is_local_dest) continue; /* Deliver frames directly addressed to us to master only */ if (port->type != HSR_PT_MASTER && frame->is_local_exclusive) continue; /* If hardware duplicate generation is enabled, only send out * one port. */ if ((port->dev->features & NETIF_F_HW_HSR_DUP) && sent) continue; /* Don't send frame over port where it has been sent before. * Also for SAN, this shouldn't be done. */ if (!frame->is_from_san && hsr->proto_ops->register_frame_out && hsr->proto_ops->register_frame_out(port, frame)) continue; if (frame->is_supervision && port->type == HSR_PT_MASTER && !frame->is_proxy_supervision) { hsr_handle_sup_frame(frame); continue; } /* Check if frame is to be dropped. Eg. for PRP no forward * between ports, or sending HSR supervision to RedBox. */ if (hsr->proto_ops->drop_frame && hsr->proto_ops->drop_frame(frame, port)) continue; if (port->type == HSR_PT_SLAVE_A || port->type == HSR_PT_SLAVE_B) skb = hsr->proto_ops->create_tagged_frame(frame, port); else skb = hsr->proto_ops->get_untagged_frame(frame, port); if (!skb) { frame->port_rcv->dev->stats.rx_dropped++; continue; } skb->dev = port->dev; if (port->type == HSR_PT_MASTER) { hsr_deliver_master(skb, port->dev, frame->node_src); } else { if (!hsr_xmit(skb, port, frame)) if (port->type == HSR_PT_SLAVE_A || port->type == HSR_PT_SLAVE_B) sent = true; } } } static void check_local_dest(struct hsr_priv *hsr, struct sk_buff *skb, struct hsr_frame_info *frame) { if (hsr_addr_is_self(hsr, eth_hdr(skb)->h_dest)) { frame->is_local_exclusive = true; skb->pkt_type = PACKET_HOST; } else { frame->is_local_exclusive = false; } if (skb->pkt_type == PACKET_HOST || skb->pkt_type == PACKET_MULTICAST || skb->pkt_type == PACKET_BROADCAST) { frame->is_local_dest = true; } else { frame->is_local_dest = false; } } static void handle_std_frame(struct sk_buff *skb, struct hsr_frame_info *frame) { struct hsr_port *port = frame->port_rcv; struct hsr_priv *hsr = port->hsr; frame->skb_hsr = NULL; frame->skb_prp = NULL; frame->skb_std = skb; if (port->type != HSR_PT_MASTER) frame->is_from_san = true; if (port->type == HSR_PT_MASTER || port->type == HSR_PT_INTERLINK) { /* Sequence nr for the master/interlink node */ lockdep_assert_held(&hsr->seqnr_lock); frame->sequence_nr = hsr->sequence_nr; hsr->sequence_nr++; } } int hsr_fill_frame_info(__be16 proto, struct sk_buff *skb, struct hsr_frame_info *frame) { struct hsr_port *port = frame->port_rcv; struct hsr_priv *hsr = port->hsr; /* HSRv0 supervisory frames double as a tag so treat them as tagged. */ if ((!hsr->prot_version && proto == htons(ETH_P_PRP)) || proto == htons(ETH_P_HSR)) { /* Check if skb contains hsr_ethhdr */ if (skb->mac_len < sizeof(struct hsr_ethhdr)) return -EINVAL; /* HSR tagged frame :- Data or Supervision */ frame->skb_std = NULL; frame->skb_prp = NULL; frame->skb_hsr = skb; frame->sequence_nr = hsr_get_skb_sequence_nr(skb); return 0; } /* Standard frame or PRP from master port */ handle_std_frame(skb, frame); return 0; } int prp_fill_frame_info(__be16 proto, struct sk_buff *skb, struct hsr_frame_info *frame) { /* Supervision frame */ struct prp_rct *rct = skb_get_PRP_rct(skb); if (rct && prp_check_lsdu_size(skb, rct, frame->is_supervision)) { frame->skb_hsr = NULL; frame->skb_std = NULL; frame->skb_prp = skb; frame->sequence_nr = prp_get_skb_sequence_nr(rct); return 0; } handle_std_frame(skb, frame); return 0; } static int fill_frame_info(struct hsr_frame_info *frame, struct sk_buff *skb, struct hsr_port *port) { struct hsr_priv *hsr = port->hsr; struct hsr_vlan_ethhdr *vlan_hdr; struct list_head *n_db; struct ethhdr *ethhdr; __be16 proto; int ret; /* Check if skb contains ethhdr */ if (skb->mac_len < sizeof(struct ethhdr)) return -EINVAL; memset(frame, 0, sizeof(*frame)); frame->is_supervision = is_supervision_frame(port->hsr, skb); if (frame->is_supervision && hsr->redbox) frame->is_proxy_supervision = is_proxy_supervision_frame(port->hsr, skb); n_db = &hsr->node_db; if (port->type == HSR_PT_INTERLINK) n_db = &hsr->proxy_node_db; frame->node_src = hsr_get_node(port, n_db, skb, frame->is_supervision, port->type); if (!frame->node_src) return -1; /* Unknown node and !is_supervision, or no mem */ ethhdr = (struct ethhdr *)skb_mac_header(skb); frame->is_vlan = false; proto = ethhdr->h_proto; if (proto == htons(ETH_P_8021Q)) frame->is_vlan = true; if (frame->is_vlan) { /* Note: skb->mac_len might be wrong here. */ if (!pskb_may_pull(skb, skb_mac_offset(skb) + offsetofend(struct hsr_vlan_ethhdr, vlanhdr))) return -EINVAL; vlan_hdr = (struct hsr_vlan_ethhdr *)skb_mac_header(skb); proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto; } frame->is_from_san = false; frame->port_rcv = port; ret = hsr->proto_ops->fill_frame_info(proto, skb, frame); if (ret) return ret; check_local_dest(port->hsr, skb, frame); return 0; } /* Must be called holding rcu read lock (because of the port parameter) */ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port) { struct hsr_frame_info frame; rcu_read_lock(); if (fill_frame_info(&frame, skb, port) < 0) goto out_drop; hsr_register_frame_in(frame.node_src, port, frame.sequence_nr); hsr_forward_do(&frame); rcu_read_unlock(); /* Gets called for ingress frames as well as egress from master port. * So check and increment stats for master port only here. */ if (port->type == HSR_PT_MASTER || port->type == HSR_PT_INTERLINK) { port->dev->stats.tx_packets++; port->dev->stats.tx_bytes += skb->len; } kfree_skb(frame.skb_hsr); kfree_skb(frame.skb_prp); kfree_skb(frame.skb_std); return; out_drop: rcu_read_unlock(); port->dev->stats.tx_dropped++; kfree_skb(skb); }
174 170 3 3 4 2 59 4 30 2 3 1 2 2 31 32 13 13 32 32 32 32 1 1 32 32 1 1 56 57 5 3 49 50 13 10 13 13 13 13 13 13 32 32 1 30 10 25 11 11 11 2 9 1 1 1 1 1 1 12 10 2 2 2 15 16 16 6 12 11 1 12 16 7 7 7 7 7 13 2 3 9 9 4 4 1 3 3 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 // SPDX-License-Identifier: GPL-2.0 /* * To speed up listener socket lookup, create an array to store all sockets * listening on the same port. This allows a decision to be made after finding * the first socket. An optional BPF program can also be configured for * selecting the socket index from the array of available sockets. */ #include <net/ip.h> #include <net/sock_reuseport.h> #include <linux/bpf.h> #include <linux/idr.h> #include <linux/filter.h> #include <linux/rcupdate.h> #define INIT_SOCKS 128 DEFINE_SPINLOCK(reuseport_lock); static DEFINE_IDA(reuseport_ida); static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse, struct sock_reuseport *reuse, bool bind_inany); void reuseport_has_conns_set(struct sock *sk) { struct sock_reuseport *reuse; if (!rcu_access_pointer(sk->sk_reuseport_cb)) return; spin_lock_bh(&reuseport_lock); reuse = rcu_dereference_protected(sk->sk_reuseport_cb, lockdep_is_held(&reuseport_lock)); if (likely(reuse)) reuse->has_conns = 1; spin_unlock_bh(&reuseport_lock); } EXPORT_SYMBOL(reuseport_has_conns_set); static void __reuseport_get_incoming_cpu(struct sock_reuseport *reuse) { /* Paired with READ_ONCE() in reuseport_select_sock_by_hash(). */ WRITE_ONCE(reuse->incoming_cpu, reuse->incoming_cpu + 1); } static void __reuseport_put_incoming_cpu(struct sock_reuseport *reuse) { /* Paired with READ_ONCE() in reuseport_select_sock_by_hash(). */ WRITE_ONCE(reuse->incoming_cpu, reuse->incoming_cpu - 1); } static void reuseport_get_incoming_cpu(struct sock *sk, struct sock_reuseport *reuse) { if (sk->sk_incoming_cpu >= 0) __reuseport_get_incoming_cpu(reuse); } static void reuseport_put_incoming_cpu(struct sock *sk, struct sock_reuseport *reuse) { if (sk->sk_incoming_cpu >= 0) __reuseport_put_incoming_cpu(reuse); } void reuseport_update_incoming_cpu(struct sock *sk, int val) { struct sock_reuseport *reuse; int old_sk_incoming_cpu; if (unlikely(!rcu_access_pointer(sk->sk_reuseport_cb))) { /* Paired with REAE_ONCE() in sk_incoming_cpu_update() * and compute_score(). */ WRITE_ONCE(sk->sk_incoming_cpu, val); return; } spin_lock_bh(&reuseport_lock); /* This must be done under reuseport_lock to avoid a race with * reuseport_grow(), which accesses sk->sk_incoming_cpu without * lock_sock() when detaching a shutdown()ed sk. * * Paired with READ_ONCE() in reuseport_select_sock_by_hash(). */ old_sk_incoming_cpu = sk->sk_incoming_cpu; WRITE_ONCE(sk->sk_incoming_cpu, val); reuse = rcu_dereference_protected(sk->sk_reuseport_cb, lockdep_is_held(&reuseport_lock)); /* reuseport_grow() has detached a closed sk. */ if (!reuse) goto out; if (old_sk_incoming_cpu < 0 && val >= 0) __reuseport_get_incoming_cpu(reuse); else if (old_sk_incoming_cpu >= 0 && val < 0) __reuseport_put_incoming_cpu(reuse); out: spin_unlock_bh(&reuseport_lock); } static int reuseport_sock_index(struct sock *sk, const struct sock_reuseport *reuse, bool closed) { int left, right; if (!closed) { left = 0; right = reuse->num_socks; } else { left = reuse->max_socks - reuse->num_closed_socks; right = reuse->max_socks; } for (; left < right; left++) if (reuse->socks[left] == sk) return left; return -1; } static void __reuseport_add_sock(struct sock *sk, struct sock_reuseport *reuse) { reuse->socks[reuse->num_socks] = sk; /* paired with smp_rmb() in reuseport_(select|migrate)_sock() */ smp_wmb(); reuse->num_socks++; reuseport_get_incoming_cpu(sk, reuse); } static bool __reuseport_detach_sock(struct sock *sk, struct sock_reuseport *reuse) { int i = reuseport_sock_index(sk, reuse, false); if (i == -1) return false; reuse->socks[i] = reuse->socks[reuse->num_socks - 1]; reuse->num_socks--; reuseport_put_incoming_cpu(sk, reuse); return true; } static void __reuseport_add_closed_sock(struct sock *sk, struct sock_reuseport *reuse) { reuse->socks[reuse->max_socks - reuse->num_closed_socks - 1] = sk; /* paired with READ_ONCE() in inet_csk_bind_conflict() */ WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks + 1); reuseport_get_incoming_cpu(sk, reuse); } static bool __reuseport_detach_closed_sock(struct sock *sk, struct sock_reuseport *reuse) { int i = reuseport_sock_index(sk, reuse, true); if (i == -1) return false; reuse->socks[i] = reuse->socks[reuse->max_socks - reuse->num_closed_socks]; /* paired with READ_ONCE() in inet_csk_bind_conflict() */ WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks - 1); reuseport_put_incoming_cpu(sk, reuse); return true; } static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks) { struct sock_reuseport *reuse; reuse = kzalloc(struct_size(reuse, socks, max_socks), GFP_ATOMIC); if (!reuse) return NULL; reuse->max_socks = max_socks; RCU_INIT_POINTER(reuse->prog, NULL); return reuse; } int reuseport_alloc(struct sock *sk, bool bind_inany) { struct sock_reuseport *reuse; int id, ret = 0; /* bh lock used since this function call may precede hlist lock in * soft irq of receive path or setsockopt from process context */ spin_lock_bh(&reuseport_lock); /* Allocation attempts can occur concurrently via the setsockopt path * and the bind/hash path. Nothing to do when we lose the race. */ reuse = rcu_dereference_protected(sk->sk_reuseport_cb, lockdep_is_held(&reuseport_lock)); if (reuse) { if (reuse->num_closed_socks) { /* sk was shutdown()ed before */ ret = reuseport_resurrect(sk, reuse, NULL, bind_inany); goto out; } /* Only set reuse->bind_inany if the bind_inany is true. * Otherwise, it will overwrite the reuse->bind_inany * which was set by the bind/hash path. */ if (bind_inany) reuse->bind_inany = bind_inany; goto out; } reuse = __reuseport_alloc(INIT_SOCKS); if (!reuse) { ret = -ENOMEM; goto out; } id = ida_alloc(&reuseport_ida, GFP_ATOMIC); if (id < 0) { kfree(reuse); ret = id; goto out; } reuse->reuseport_id = id; reuse->bind_inany = bind_inany; reuse->socks[0] = sk; reuse->num_socks = 1; reuseport_get_incoming_cpu(sk, reuse); rcu_assign_pointer(sk->sk_reuseport_cb, reuse); out: spin_unlock_bh(&reuseport_lock); return ret; } EXPORT_SYMBOL(reuseport_alloc); static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) { struct sock_reuseport *more_reuse; u32 more_socks_size, i; more_socks_size = reuse->max_socks * 2U; if (more_socks_size > U16_MAX) { if (reuse->num_closed_socks) { /* Make room by removing a closed sk. * The child has already been migrated. * Only reqsk left at this point. */ struct sock *sk; sk = reuse->socks[reuse->max_socks - reuse->num_closed_socks]; RCU_INIT_POINTER(sk->sk_reuseport_cb, NULL); __reuseport_detach_closed_sock(sk, reuse); return reuse; } return NULL; } more_reuse = __reuseport_alloc(more_socks_size); if (!more_reuse) return NULL; more_reuse->num_socks = reuse->num_socks; more_reuse->num_closed_socks = reuse->num_closed_socks; more_reuse->prog = reuse->prog; more_reuse->reuseport_id = reuse->reuseport_id; more_reuse->bind_inany = reuse->bind_inany; more_reuse->has_conns = reuse->has_conns; more_reuse->incoming_cpu = reuse->incoming_cpu; memcpy(more_reuse->socks, reuse->socks, reuse->num_socks * sizeof(struct sock *)); memcpy(more_reuse->socks + (more_reuse->max_socks - more_reuse->num_closed_socks), reuse->socks + (reuse->max_socks - reuse->num_closed_socks), reuse->num_closed_socks * sizeof(struct sock *)); more_reuse->synq_overflow_ts = READ_ONCE(reuse->synq_overflow_ts); for (i = 0; i < reuse->max_socks; ++i) rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb, more_reuse); /* Note: we use kfree_rcu here instead of reuseport_free_rcu so * that reuse and more_reuse can temporarily share a reference * to prog. */ kfree_rcu(reuse, rcu); return more_reuse; } static void reuseport_free_rcu(struct rcu_head *head) { struct sock_reuseport *reuse; reuse = container_of(head, struct sock_reuseport, rcu); sk_reuseport_prog_free(rcu_dereference_protected(reuse->prog, 1)); ida_free(&reuseport_ida, reuse->reuseport_id); kfree(reuse); } /** * reuseport_add_sock - Add a socket to the reuseport group of another. * @sk: New socket to add to the group. * @sk2: Socket belonging to the existing reuseport group. * @bind_inany: Whether or not the group is bound to a local INANY address. * * May return ENOMEM and not add socket to group under memory pressure. */ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany) { struct sock_reuseport *old_reuse, *reuse; if (!rcu_access_pointer(sk2->sk_reuseport_cb)) { int err = reuseport_alloc(sk2, bind_inany); if (err) return err; } spin_lock_bh(&reuseport_lock); reuse = rcu_dereference_protected(sk2->sk_reuseport_cb, lockdep_is_held(&reuseport_lock)); old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb, lockdep_is_held(&reuseport_lock)); if (old_reuse && old_reuse->num_closed_socks) { /* sk was shutdown()ed before */ int err = reuseport_resurrect(sk, old_reuse, reuse, reuse->bind_inany); spin_unlock_bh(&reuseport_lock); return err; } if (old_reuse && old_reuse->num_socks != 1) { spin_unlock_bh(&reuseport_lock); return -EBUSY; } if (reuse->num_socks + reuse->num_closed_socks == reuse->max_socks) { reuse = reuseport_grow(reuse); if (!reuse) { spin_unlock_bh(&reuseport_lock); return -ENOMEM; } } __reuseport_add_sock(sk, reuse); rcu_assign_pointer(sk->sk_reuseport_cb, reuse); spin_unlock_bh(&reuseport_lock); if (old_reuse) call_rcu(&old_reuse->rcu, reuseport_free_rcu); return 0; } EXPORT_SYMBOL(reuseport_add_sock); static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse, struct sock_reuseport *reuse, bool bind_inany) { if (old_reuse == reuse) { /* If sk was in the same reuseport group, just pop sk out of * the closed section and push sk into the listening section. */ __reuseport_detach_closed_sock(sk, old_reuse); __reuseport_add_sock(sk, old_reuse); return 0; } if (!reuse) { /* In bind()/listen() path, we cannot carry over the eBPF prog * for the shutdown()ed socket. In setsockopt() path, we should * not change the eBPF prog of listening sockets by attaching a * prog to the shutdown()ed socket. Thus, we will allocate a new * reuseport group and detach sk from the old group. */ int id; reuse = __reuseport_alloc(INIT_SOCKS); if (!reuse) return -ENOMEM; id = ida_alloc(&reuseport_ida, GFP_ATOMIC); if (id < 0) { kfree(reuse); return id; } reuse->reuseport_id = id; reuse->bind_inany = bind_inany; } else { /* Move sk from the old group to the new one if * - all the other listeners in the old group were close()d or * shutdown()ed, and then sk2 has listen()ed on the same port * OR * - sk listen()ed without bind() (or with autobind), was * shutdown()ed, and then listen()s on another port which * sk2 listen()s on. */ if (reuse->num_socks + reuse->num_closed_socks == reuse->max_socks) { reuse = reuseport_grow(reuse); if (!reuse) return -ENOMEM; } } __reuseport_detach_closed_sock(sk, old_reuse); __reuseport_add_sock(sk, reuse); rcu_assign_pointer(sk->sk_reuseport_cb, reuse); if (old_reuse->num_socks + old_reuse->num_closed_socks == 0) call_rcu(&old_reuse->rcu, reuseport_free_rcu); return 0; } void reuseport_detach_sock(struct sock *sk) { struct sock_reuseport *reuse; spin_lock_bh(&reuseport_lock); reuse = rcu_dereference_protected(sk->sk_reuseport_cb, lockdep_is_held(&reuseport_lock)); /* reuseport_grow() has detached a closed sk */ if (!reuse) goto out; /* Notify the bpf side. The sk may be added to a sockarray * map. If so, sockarray logic will remove it from the map. * * Other bpf map types that work with reuseport, like sockmap, * don't need an explicit callback from here. They override sk * unhash/close ops to remove the sk from the map before we * get to this point. */ bpf_sk_reuseport_detach(sk); rcu_assign_pointer(sk->sk_reuseport_cb, NULL); if (!__reuseport_detach_closed_sock(sk, reuse)) __reuseport_detach_sock(sk, reuse); if (reuse->num_socks + reuse->num_closed_socks == 0) call_rcu(&reuse->rcu, reuseport_free_rcu); out: spin_unlock_bh(&reuseport_lock); } EXPORT_SYMBOL(reuseport_detach_sock); void reuseport_stop_listen_sock(struct sock *sk) { if (sk->sk_protocol == IPPROTO_TCP) { struct sock_reuseport *reuse; struct bpf_prog *prog; spin_lock_bh(&reuseport_lock); reuse = rcu_dereference_protected(sk->sk_reuseport_cb, lockdep_is_held(&reuseport_lock)); prog = rcu_dereference_protected(reuse->prog, lockdep_is_held(&reuseport_lock)); if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req) || (prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) { /* Migration capable, move sk from the listening section * to the closed section. */ bpf_sk_reuseport_detach(sk); __reuseport_detach_sock(sk, reuse); __reuseport_add_closed_sock(sk, reuse); spin_unlock_bh(&reuseport_lock); return; } spin_unlock_bh(&reuseport_lock); } /* Not capable to do migration, detach immediately */ reuseport_detach_sock(sk); } EXPORT_SYMBOL(reuseport_stop_listen_sock); static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks, struct bpf_prog *prog, struct sk_buff *skb, int hdr_len) { struct sk_buff *nskb = NULL; u32 index; if (skb_shared(skb)) { nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) return NULL; skb = nskb; } /* temporarily advance data past protocol header */ if (!pskb_pull(skb, hdr_len)) { kfree_skb(nskb); return NULL; } index = bpf_prog_run_save_cb(prog, skb); __skb_push(skb, hdr_len); consume_skb(nskb); if (index >= socks) return NULL; return reuse->socks[index]; } static struct sock *reuseport_select_sock_by_hash(struct sock_reuseport *reuse, u32 hash, u16 num_socks) { struct sock *first_valid_sk = NULL; int i, j; i = j = reciprocal_scale(hash, num_socks); do { struct sock *sk = reuse->socks[i]; if (sk->sk_state != TCP_ESTABLISHED) { /* Paired with WRITE_ONCE() in __reuseport_(get|put)_incoming_cpu(). */ if (!READ_ONCE(reuse->incoming_cpu)) return sk; /* Paired with WRITE_ONCE() in reuseport_update_incoming_cpu(). */ if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) return sk; if (!first_valid_sk) first_valid_sk = sk; } i++; if (i >= num_socks) i = 0; } while (i != j); return first_valid_sk; } /** * reuseport_select_sock - Select a socket from an SO_REUSEPORT group. * @sk: First socket in the group. * @hash: When no BPF filter is available, use this hash to select. * @skb: skb to run through BPF filter. * @hdr_len: BPF filter expects skb data pointer at payload data. If * the skb does not yet point at the payload, this parameter represents * how far the pointer needs to advance to reach the payload. * Returns a socket that should receive the packet (or NULL on error). */ struct sock *reuseport_select_sock(struct sock *sk, u32 hash, struct sk_buff *skb, int hdr_len) { struct sock_reuseport *reuse; struct bpf_prog *prog; struct sock *sk2 = NULL; u16 socks; rcu_read_lock(); reuse = rcu_dereference(sk->sk_reuseport_cb); /* if memory allocation failed or add call is not yet complete */ if (!reuse) goto out; prog = rcu_dereference(reuse->prog); socks = READ_ONCE(reuse->num_socks); if (likely(socks)) { /* paired with smp_wmb() in __reuseport_add_sock() */ smp_rmb(); if (!prog || !skb) goto select_by_hash; if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, NULL, hash); else sk2 = run_bpf_filter(reuse, socks, prog, skb, hdr_len); select_by_hash: /* no bpf or invalid bpf result: fall back to hash usage */ if (!sk2) sk2 = reuseport_select_sock_by_hash(reuse, hash, socks); } out: rcu_read_unlock(); return sk2; } EXPORT_SYMBOL(reuseport_select_sock); /** * reuseport_migrate_sock - Select a socket from an SO_REUSEPORT group. * @sk: close()ed or shutdown()ed socket in the group. * @migrating_sk: ESTABLISHED/SYN_RECV full socket in the accept queue or * NEW_SYN_RECV request socket during 3WHS. * @skb: skb to run through BPF filter. * Returns a socket (with sk_refcnt +1) that should accept the child socket * (or NULL on error). */ struct sock *reuseport_migrate_sock(struct sock *sk, struct sock *migrating_sk, struct sk_buff *skb) { struct sock_reuseport *reuse; struct sock *nsk = NULL; bool allocated = false; struct bpf_prog *prog; u16 socks; u32 hash; rcu_read_lock(); reuse = rcu_dereference(sk->sk_reuseport_cb); if (!reuse) goto out; socks = READ_ONCE(reuse->num_socks); if (unlikely(!socks)) goto failure; /* paired with smp_wmb() in __reuseport_add_sock() */ smp_rmb(); hash = migrating_sk->sk_hash; prog = rcu_dereference(reuse->prog); if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) { if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req)) goto select_by_hash; goto failure; } if (!skb) { skb = alloc_skb(0, GFP_ATOMIC); if (!skb) goto failure; allocated = true; } nsk = bpf_run_sk_reuseport(reuse, sk, prog, skb, migrating_sk, hash); if (allocated) kfree_skb(skb); select_by_hash: if (!nsk) nsk = reuseport_select_sock_by_hash(reuse, hash, socks); if (IS_ERR_OR_NULL(nsk) || unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt))) { nsk = NULL; goto failure; } out: rcu_read_unlock(); return nsk; failure: __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); goto out; } EXPORT_SYMBOL(reuseport_migrate_sock); int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog) { struct sock_reuseport *reuse; struct bpf_prog *old_prog; if (sk_unhashed(sk)) { int err; if (!sk->sk_reuseport) return -EINVAL; err = reuseport_alloc(sk, false); if (err) return err; } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) { /* The socket wasn't bound with SO_REUSEPORT */ return -EINVAL; } spin_lock_bh(&reuseport_lock); reuse = rcu_dereference_protected(sk->sk_reuseport_cb, lockdep_is_held(&reuseport_lock)); old_prog = rcu_dereference_protected(reuse->prog, lockdep_is_held(&reuseport_lock)); rcu_assign_pointer(reuse->prog, prog); spin_unlock_bh(&reuseport_lock); sk_reuseport_prog_free(old_prog); return 0; } EXPORT_SYMBOL(reuseport_attach_prog); int reuseport_detach_prog(struct sock *sk) { struct sock_reuseport *reuse; struct bpf_prog *old_prog; old_prog = NULL; spin_lock_bh(&reuseport_lock); reuse = rcu_dereference_protected(sk->sk_reuseport_cb, lockdep_is_held(&reuseport_lock)); /* reuse must be checked after acquiring the reuseport_lock * because reuseport_grow() can detach a closed sk. */ if (!reuse) { spin_unlock_bh(&reuseport_lock); return sk->sk_reuseport ? -ENOENT : -EINVAL; } if (sk_unhashed(sk) && reuse->num_closed_socks) { spin_unlock_bh(&reuseport_lock); return -ENOENT; } old_prog = rcu_replace_pointer(reuse->prog, old_prog, lockdep_is_held(&reuseport_lock)); spin_unlock_bh(&reuseport_lock); if (!old_prog) return -ENOENT; sk_reuseport_prog_free(old_prog); return 0; } EXPORT_SYMBOL(reuseport_detach_prog);
3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 8 1 2 5 2 1 2 5 5 2 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 // SPDX-License-Identifier: GPL-2.0-only /* * A driver for the Griffin Technology, Inc. "PowerMate" USB controller dial. * * v1.1, (c)2002 William R Sowerbutts <will@sowerbutts.com> * * This device is a anodised aluminium knob which connects over USB. It can measure * clockwise and anticlockwise rotation. The dial also acts as a pushbutton with * a spring for automatic release. The base contains a pair of LEDs which illuminate * the translucent base. It rotates without limit and reports its relative rotation * back to the host when polled by the USB controller. * * Testing with the knob I have has shown that it measures approximately 94 "clicks" * for one full rotation. Testing with my High Speed Rotation Actuator (ok, it was * a variable speed cordless electric drill) has shown that the device can measure * speeds of up to 7 clicks either clockwise or anticlockwise between pollings from * the host. If it counts more than 7 clicks before it is polled, it will wrap back * to zero and start counting again. This was at quite high speed, however, almost * certainly faster than the human hand could turn it. Griffin say that it loses a * pulse or two on a direction change; the granularity is so fine that I never * noticed this in practice. * * The device's microcontroller can be programmed to set the LED to either a constant * intensity, or to a rhythmic pulsing. Several patterns and speeds are available. * * Griffin were very happy to provide documentation and free hardware for development. * * Some userspace tools are available on the web: http://sowerbutts.com/powermate/ * */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/usb/input.h> #define POWERMATE_VENDOR 0x077d /* Griffin Technology, Inc. */ #define POWERMATE_PRODUCT_NEW 0x0410 /* Griffin PowerMate */ #define POWERMATE_PRODUCT_OLD 0x04AA /* Griffin soundKnob */ #define CONTOUR_VENDOR 0x05f3 /* Contour Design, Inc. */ #define CONTOUR_JOG 0x0240 /* Jog and Shuttle */ /* these are the command codes we send to the device */ #define SET_STATIC_BRIGHTNESS 0x01 #define SET_PULSE_ASLEEP 0x02 #define SET_PULSE_AWAKE 0x03 #define SET_PULSE_MODE 0x04 /* these refer to bits in the powermate_device's requires_update field. */ #define UPDATE_STATIC_BRIGHTNESS (1<<0) #define UPDATE_PULSE_ASLEEP (1<<1) #define UPDATE_PULSE_AWAKE (1<<2) #define UPDATE_PULSE_MODE (1<<3) /* at least two versions of the hardware exist, with differing payload sizes. the first three bytes always contain the "interesting" data in the relevant format. */ #define POWERMATE_PAYLOAD_SIZE_MAX 6 #define POWERMATE_PAYLOAD_SIZE_MIN 3 struct powermate_device { signed char *data; dma_addr_t data_dma; struct urb *irq, *config; struct usb_ctrlrequest *configcr; struct usb_device *udev; struct usb_interface *intf; struct input_dev *input; spinlock_t lock; int static_brightness; int pulse_speed; int pulse_table; int pulse_asleep; int pulse_awake; int requires_update; // physical settings which are out of sync char phys[64]; }; static char pm_name_powermate[] = "Griffin PowerMate"; static char pm_name_soundknob[] = "Griffin SoundKnob"; static void powermate_config_complete(struct urb *urb); /* Callback for data arriving from the PowerMate over the USB interrupt pipe */ static void powermate_irq(struct urb *urb) { struct powermate_device *pm = urb->context; struct device *dev = &pm->intf->dev; int retval; switch (urb->status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ dev_dbg(dev, "%s - urb shutting down with status: %d\n", __func__, urb->status); return; default: dev_dbg(dev, "%s - nonzero urb status received: %d\n", __func__, urb->status); goto exit; } /* handle updates to device state */ input_report_key(pm->input, BTN_0, pm->data[0] & 0x01); input_report_rel(pm->input, REL_DIAL, pm->data[1]); input_sync(pm->input); exit: retval = usb_submit_urb (urb, GFP_ATOMIC); if (retval) dev_err(dev, "%s - usb_submit_urb failed with result: %d\n", __func__, retval); } /* Decide if we need to issue a control message and do so. Must be called with pm->lock taken */ static void powermate_sync_state(struct powermate_device *pm) { if (pm->requires_update == 0) return; /* no updates are required */ if (pm->config->status == -EINPROGRESS) return; /* an update is already in progress; it'll issue this update when it completes */ if (pm->requires_update & UPDATE_PULSE_ASLEEP){ pm->configcr->wValue = cpu_to_le16( SET_PULSE_ASLEEP ); pm->configcr->wIndex = cpu_to_le16( pm->pulse_asleep ? 1 : 0 ); pm->requires_update &= ~UPDATE_PULSE_ASLEEP; }else if (pm->requires_update & UPDATE_PULSE_AWAKE){ pm->configcr->wValue = cpu_to_le16( SET_PULSE_AWAKE ); pm->configcr->wIndex = cpu_to_le16( pm->pulse_awake ? 1 : 0 ); pm->requires_update &= ~UPDATE_PULSE_AWAKE; }else if (pm->requires_update & UPDATE_PULSE_MODE){ int op, arg; /* the powermate takes an operation and an argument for its pulse algorithm. the operation can be: 0: divide the speed 1: pulse at normal speed 2: multiply the speed the argument only has an effect for operations 0 and 2, and ranges between 1 (least effect) to 255 (maximum effect). thus, several states are equivalent and are coalesced into one state. we map this onto a range from 0 to 510, with: 0 -- 254 -- use divide (0 = slowest) 255 -- use normal speed 256 -- 510 -- use multiple (510 = fastest). Only values of 'arg' quite close to 255 are particularly useful/spectacular. */ if (pm->pulse_speed < 255) { op = 0; // divide arg = 255 - pm->pulse_speed; } else if (pm->pulse_speed > 255) { op = 2; // multiply arg = pm->pulse_speed - 255; } else { op = 1; // normal speed arg = 0; // can be any value } pm->configcr->wValue = cpu_to_le16( (pm->pulse_table << 8) | SET_PULSE_MODE ); pm->configcr->wIndex = cpu_to_le16( (arg << 8) | op ); pm->requires_update &= ~UPDATE_PULSE_MODE; } else if (pm->requires_update & UPDATE_STATIC_BRIGHTNESS) { pm->configcr->wValue = cpu_to_le16( SET_STATIC_BRIGHTNESS ); pm->configcr->wIndex = cpu_to_le16( pm->static_brightness ); pm->requires_update &= ~UPDATE_STATIC_BRIGHTNESS; } else { printk(KERN_ERR "powermate: unknown update required"); pm->requires_update = 0; /* fudge the bug */ return; } /* printk("powermate: %04x %04x\n", pm->configcr->wValue, pm->configcr->wIndex); */ pm->configcr->bRequestType = 0x41; /* vendor request */ pm->configcr->bRequest = 0x01; pm->configcr->wLength = 0; usb_fill_control_urb(pm->config, pm->udev, usb_sndctrlpipe(pm->udev, 0), (void *) pm->configcr, NULL, 0, powermate_config_complete, pm); if (usb_submit_urb(pm->config, GFP_ATOMIC)) printk(KERN_ERR "powermate: usb_submit_urb(config) failed"); } /* Called when our asynchronous control message completes. We may need to issue another immediately */ static void powermate_config_complete(struct urb *urb) { struct powermate_device *pm = urb->context; if (urb->status) printk(KERN_ERR "powermate: config urb returned %d\n", urb->status); guard(spinlock_irqsave)(&pm->lock); powermate_sync_state(pm); } /* Set the LED up as described and begin the sync with the hardware if required */ static void powermate_pulse_led(struct powermate_device *pm, int static_brightness, int pulse_speed, int pulse_table, int pulse_asleep, int pulse_awake) { if (pulse_speed < 0) pulse_speed = 0; if (pulse_table < 0) pulse_table = 0; if (pulse_speed > 510) pulse_speed = 510; if (pulse_table > 2) pulse_table = 2; pulse_asleep = !!pulse_asleep; pulse_awake = !!pulse_awake; guard(spinlock_irqsave)(&pm->lock); /* mark state updates which are required */ if (static_brightness != pm->static_brightness) { pm->static_brightness = static_brightness; pm->requires_update |= UPDATE_STATIC_BRIGHTNESS; } if (pulse_asleep != pm->pulse_asleep) { pm->pulse_asleep = pulse_asleep; pm->requires_update |= (UPDATE_PULSE_ASLEEP | UPDATE_STATIC_BRIGHTNESS); } if (pulse_awake != pm->pulse_awake) { pm->pulse_awake = pulse_awake; pm->requires_update |= (UPDATE_PULSE_AWAKE | UPDATE_STATIC_BRIGHTNESS); } if (pulse_speed != pm->pulse_speed || pulse_table != pm->pulse_table) { pm->pulse_speed = pulse_speed; pm->pulse_table = pulse_table; pm->requires_update |= UPDATE_PULSE_MODE; } powermate_sync_state(pm); } /* Callback from the Input layer when an event arrives from userspace to configure the LED */ static int powermate_input_event(struct input_dev *dev, unsigned int type, unsigned int code, int _value) { unsigned int command = (unsigned int)_value; struct powermate_device *pm = input_get_drvdata(dev); if (type == EV_MSC && code == MSC_PULSELED){ /* bits 0- 7: 8 bits: LED brightness bits 8-16: 9 bits: pulsing speed modifier (0 ... 510); 0-254 = slower, 255 = standard, 256-510 = faster. bits 17-18: 2 bits: pulse table (0, 1, 2 valid) bit 19: 1 bit : pulse whilst asleep? bit 20: 1 bit : pulse constantly? */ int static_brightness = command & 0xFF; // bits 0-7 int pulse_speed = (command >> 8) & 0x1FF; // bits 8-16 int pulse_table = (command >> 17) & 0x3; // bits 17-18 int pulse_asleep = (command >> 19) & 0x1; // bit 19 int pulse_awake = (command >> 20) & 0x1; // bit 20 powermate_pulse_led(pm, static_brightness, pulse_speed, pulse_table, pulse_asleep, pulse_awake); } return 0; } static int powermate_alloc_buffers(struct usb_device *udev, struct powermate_device *pm) { pm->data = usb_alloc_coherent(udev, POWERMATE_PAYLOAD_SIZE_MAX, GFP_KERNEL, &pm->data_dma); if (!pm->data) return -1; pm->configcr = kmalloc(sizeof(*(pm->configcr)), GFP_KERNEL); if (!pm->configcr) return -ENOMEM; return 0; } static void powermate_free_buffers(struct usb_device *udev, struct powermate_device *pm) { usb_free_coherent(udev, POWERMATE_PAYLOAD_SIZE_MAX, pm->data, pm->data_dma); kfree(pm->configcr); } /* Called whenever a USB device matching one in our supported devices table is connected */ static int powermate_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev (intf); struct usb_host_interface *interface; struct usb_endpoint_descriptor *endpoint; struct powermate_device *pm; struct input_dev *input_dev; int pipe, maxp; int error = -ENOMEM; interface = intf->cur_altsetting; if (interface->desc.bNumEndpoints < 1) return -EINVAL; endpoint = &interface->endpoint[0].desc; if (!usb_endpoint_is_int_in(endpoint)) return -EIO; usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x0a, USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, interface->desc.bInterfaceNumber, NULL, 0, USB_CTRL_SET_TIMEOUT); pm = kzalloc(sizeof(*pm), GFP_KERNEL); input_dev = input_allocate_device(); if (!pm || !input_dev) goto fail1; if (powermate_alloc_buffers(udev, pm)) goto fail2; pm->irq = usb_alloc_urb(0, GFP_KERNEL); if (!pm->irq) goto fail2; pm->config = usb_alloc_urb(0, GFP_KERNEL); if (!pm->config) goto fail3; pm->udev = udev; pm->intf = intf; pm->input = input_dev; usb_make_path(udev, pm->phys, sizeof(pm->phys)); strlcat(pm->phys, "/input0", sizeof(pm->phys)); spin_lock_init(&pm->lock); switch (le16_to_cpu(udev->descriptor.idProduct)) { case POWERMATE_PRODUCT_NEW: input_dev->name = pm_name_powermate; break; case POWERMATE_PRODUCT_OLD: input_dev->name = pm_name_soundknob; break; default: input_dev->name = pm_name_soundknob; printk(KERN_WARNING "powermate: unknown product id %04x\n", le16_to_cpu(udev->descriptor.idProduct)); } input_dev->phys = pm->phys; usb_to_input_id(udev, &input_dev->id); input_dev->dev.parent = &intf->dev; input_set_drvdata(input_dev, pm); input_dev->event = powermate_input_event; input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) | BIT_MASK(EV_MSC); input_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0); input_dev->relbit[BIT_WORD(REL_DIAL)] = BIT_MASK(REL_DIAL); input_dev->mscbit[BIT_WORD(MSC_PULSELED)] = BIT_MASK(MSC_PULSELED); /* get a handle to the interrupt data pipe */ pipe = usb_rcvintpipe(udev, endpoint->bEndpointAddress); maxp = usb_maxpacket(udev, pipe); if (maxp < POWERMATE_PAYLOAD_SIZE_MIN || maxp > POWERMATE_PAYLOAD_SIZE_MAX) { printk(KERN_WARNING "powermate: Expected payload of %d--%d bytes, found %d bytes!\n", POWERMATE_PAYLOAD_SIZE_MIN, POWERMATE_PAYLOAD_SIZE_MAX, maxp); maxp = POWERMATE_PAYLOAD_SIZE_MAX; } usb_fill_int_urb(pm->irq, udev, pipe, pm->data, maxp, powermate_irq, pm, endpoint->bInterval); pm->irq->transfer_dma = pm->data_dma; pm->irq->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; /* register our interrupt URB with the USB system */ if (usb_submit_urb(pm->irq, GFP_KERNEL)) { error = -EIO; goto fail4; } error = input_register_device(pm->input); if (error) goto fail5; /* force an update of everything */ pm->requires_update = UPDATE_PULSE_ASLEEP | UPDATE_PULSE_AWAKE | UPDATE_PULSE_MODE | UPDATE_STATIC_BRIGHTNESS; powermate_pulse_led(pm, 0x80, 255, 0, 1, 0); // set default pulse parameters usb_set_intfdata(intf, pm); return 0; fail5: usb_kill_urb(pm->irq); fail4: usb_free_urb(pm->config); fail3: usb_free_urb(pm->irq); fail2: powermate_free_buffers(udev, pm); fail1: input_free_device(input_dev); kfree(pm); return error; } /* Called when a USB device we've accepted ownership of is removed */ static void powermate_disconnect(struct usb_interface *intf) { struct powermate_device *pm = usb_get_intfdata (intf); usb_set_intfdata(intf, NULL); if (pm) { pm->requires_update = 0; usb_kill_urb(pm->irq); input_unregister_device(pm->input); usb_kill_urb(pm->config); usb_free_urb(pm->irq); usb_free_urb(pm->config); powermate_free_buffers(interface_to_usbdev(intf), pm); kfree(pm); } } static const struct usb_device_id powermate_devices[] = { { USB_DEVICE(POWERMATE_VENDOR, POWERMATE_PRODUCT_NEW) }, { USB_DEVICE(POWERMATE_VENDOR, POWERMATE_PRODUCT_OLD) }, { USB_DEVICE(CONTOUR_VENDOR, CONTOUR_JOG) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE (usb, powermate_devices); static struct usb_driver powermate_driver = { .name = "powermate", .probe = powermate_probe, .disconnect = powermate_disconnect, .id_table = powermate_devices, }; module_usb_driver(powermate_driver); MODULE_AUTHOR( "William R Sowerbutts" ); MODULE_DESCRIPTION( "Griffin Technology, Inc PowerMate driver" ); MODULE_LICENSE("GPL");
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 // SPDX-License-Identifier: GPL-2.0-or-later /* * Driver for USB webcams based on Konica chipset. This * chipset is used in Intel YC76 camera. * * Copyright (C) 2010 Hans de Goede <hdegoede@redhat.com> * * Based on the usbvideo v4l1 konicawc driver which is: * * Copyright (C) 2002 Simon Evans <spse@secret.org.uk> * * The code for making gspca work with a webcam with 2 isoc endpoints was * taken from the benq gspca subdriver which is: * * Copyright (C) 2009 Jean-Francois Moine (http://moinejf.free.fr) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "konica" #include <linux/input.h> #include "gspca.h" MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>"); MODULE_DESCRIPTION("Konica chipset USB Camera Driver"); MODULE_LICENSE("GPL"); #define WHITEBAL_REG 0x01 #define BRIGHTNESS_REG 0x02 #define SHARPNESS_REG 0x03 #define CONTRAST_REG 0x04 #define SATURATION_REG 0x05 /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ struct urb *last_data_urb; u8 snapshot_pressed; }; /* .priv is what goes to register 8 for this mode, known working values: 0x00 -> 176x144, cropped 0x01 -> 176x144, cropped 0x02 -> 176x144, cropped 0x03 -> 176x144, cropped 0x04 -> 176x144, binned 0x05 -> 320x240 0x06 -> 320x240 0x07 -> 160x120, cropped 0x08 -> 160x120, cropped 0x09 -> 160x120, binned (note has 136 lines) 0x0a -> 160x120, binned (note has 136 lines) 0x0b -> 160x120, cropped */ static const struct v4l2_pix_format vga_mode[] = { {160, 120, V4L2_PIX_FMT_KONICA420, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 160 * 136 * 3 / 2 + 960, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0x0a}, {176, 144, V4L2_PIX_FMT_KONICA420, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 176 * 144 * 3 / 2 + 960, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0x04}, {320, 240, V4L2_PIX_FMT_KONICA420, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3 / 2 + 960, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0x05}, }; static void sd_isoc_irq(struct urb *urb); static void reg_w(struct gspca_dev *gspca_dev, u16 value, u16 index) { struct usb_device *dev = gspca_dev->dev; int ret; if (gspca_dev->usb_err < 0) return; ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 0x02, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, NULL, 0, 1000); if (ret < 0) { pr_err("reg_w err writing %02x to %02x: %d\n", value, index, ret); gspca_dev->usb_err = ret; } } static void reg_r(struct gspca_dev *gspca_dev, u16 value, u16 index) { struct usb_device *dev = gspca_dev->dev; int ret; if (gspca_dev->usb_err < 0) return; ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), 0x03, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, gspca_dev->usb_buf, 2, 1000); if (ret < 0) { pr_err("reg_r err %d\n", ret); gspca_dev->usb_err = ret; /* * Make sure the buffer is zeroed to avoid uninitialized * values. */ memset(gspca_dev->usb_buf, 0, 2); } } static void konica_stream_on(struct gspca_dev *gspca_dev) { reg_w(gspca_dev, 1, 0x0b); } static void konica_stream_off(struct gspca_dev *gspca_dev) { reg_w(gspca_dev, 0, 0x0b); } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { gspca_dev->cam.cam_mode = vga_mode; gspca_dev->cam.nmodes = ARRAY_SIZE(vga_mode); gspca_dev->cam.no_urb_create = 1; return 0; } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { int i; /* * The konica needs a freaking large time to "boot" (approx 6.5 sec.), * and does not want to be bothered while doing so :| * Register 0x10 counts from 1 - 3, with 3 being "ready" */ msleep(6000); for (i = 0; i < 20; i++) { reg_r(gspca_dev, 0, 0x10); if (gspca_dev->usb_buf[0] == 3) break; msleep(100); } reg_w(gspca_dev, 0, 0x0d); return gspca_dev->usb_err; } static int sd_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; struct urb *urb; int i, n, packet_size; struct usb_host_interface *alt; struct usb_interface *intf; intf = usb_ifnum_to_if(sd->gspca_dev.dev, sd->gspca_dev.iface); alt = usb_altnum_to_altsetting(intf, sd->gspca_dev.alt); if (!alt) { pr_err("Couldn't get altsetting\n"); return -EIO; } if (alt->desc.bNumEndpoints < 2) return -ENODEV; packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); n = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv; reg_w(gspca_dev, n, 0x08); konica_stream_on(gspca_dev); if (gspca_dev->usb_err) return gspca_dev->usb_err; /* create 4 URBs - 2 on endpoint 0x83 and 2 on 0x082 */ #if MAX_NURBS < 4 #error "Not enough URBs in the gspca table" #endif #define SD_NPKT 32 for (n = 0; n < 4; n++) { i = n & 1 ? 0 : 1; packet_size = le16_to_cpu(alt->endpoint[i].desc.wMaxPacketSize); urb = usb_alloc_urb(SD_NPKT, GFP_KERNEL); if (!urb) return -ENOMEM; gspca_dev->urb[n] = urb; urb->transfer_buffer = usb_alloc_coherent(gspca_dev->dev, packet_size * SD_NPKT, GFP_KERNEL, &urb->transfer_dma); if (urb->transfer_buffer == NULL) { pr_err("usb_buffer_alloc failed\n"); return -ENOMEM; } urb->dev = gspca_dev->dev; urb->context = gspca_dev; urb->transfer_buffer_length = packet_size * SD_NPKT; urb->pipe = usb_rcvisocpipe(gspca_dev->dev, n & 1 ? 0x81 : 0x82); urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP; urb->interval = 1; urb->complete = sd_isoc_irq; urb->number_of_packets = SD_NPKT; for (i = 0; i < SD_NPKT; i++) { urb->iso_frame_desc[i].length = packet_size; urb->iso_frame_desc[i].offset = packet_size * i; } } return 0; } static void sd_stopN(struct gspca_dev *gspca_dev) { struct sd *sd __maybe_unused = (struct sd *) gspca_dev; konica_stream_off(gspca_dev); #if IS_ENABLED(CONFIG_INPUT) /* Don't keep the button in the pressed state "forever" if it was pressed when streaming is stopped */ if (sd->snapshot_pressed) { input_report_key(gspca_dev->input_dev, KEY_CAMERA, 0); input_sync(gspca_dev->input_dev); sd->snapshot_pressed = 0; } #endif } /* reception of an URB */ static void sd_isoc_irq(struct urb *urb) { struct gspca_dev *gspca_dev = (struct gspca_dev *) urb->context; struct sd *sd = (struct sd *) gspca_dev; struct urb *data_urb, *status_urb; u8 *data; int i, st; gspca_dbg(gspca_dev, D_PACK, "sd isoc irq\n"); if (!gspca_dev->streaming) return; if (urb->status != 0) { if (urb->status == -ESHUTDOWN) return; /* disconnection */ #ifdef CONFIG_PM if (gspca_dev->frozen) return; #endif gspca_err(gspca_dev, "urb status: %d\n", urb->status); st = usb_submit_urb(urb, GFP_ATOMIC); if (st < 0) pr_err("resubmit urb error %d\n", st); return; } /* if this is a data URB (ep 0x82), wait */ if (urb->transfer_buffer_length > 32) { sd->last_data_urb = urb; return; } status_urb = urb; data_urb = sd->last_data_urb; sd->last_data_urb = NULL; if (!data_urb || data_urb->start_frame != status_urb->start_frame) { gspca_err(gspca_dev, "lost sync on frames\n"); goto resubmit; } if (data_urb->number_of_packets != status_urb->number_of_packets) { gspca_err(gspca_dev, "no packets does not match, data: %d, status: %d\n", data_urb->number_of_packets, status_urb->number_of_packets); goto resubmit; } for (i = 0; i < status_urb->number_of_packets; i++) { if (data_urb->iso_frame_desc[i].status || status_urb->iso_frame_desc[i].status) { gspca_err(gspca_dev, "pkt %d data-status %d, status-status %d\n", i, data_urb->iso_frame_desc[i].status, status_urb->iso_frame_desc[i].status); gspca_dev->last_packet_type = DISCARD_PACKET; continue; } if (status_urb->iso_frame_desc[i].actual_length != 1) { gspca_err(gspca_dev, "bad status packet length %d\n", status_urb->iso_frame_desc[i].actual_length); gspca_dev->last_packet_type = DISCARD_PACKET; continue; } st = *((u8 *)status_urb->transfer_buffer + status_urb->iso_frame_desc[i].offset); data = (u8 *)data_urb->transfer_buffer + data_urb->iso_frame_desc[i].offset; /* st: 0x80-0xff: frame start with frame number (ie 0-7f) * otherwise: * bit 0 0: keep packet * 1: drop packet (padding data) * * bit 4 0 button not clicked * 1 button clicked * button is used to `take a picture' (in software) */ if (st & 0x80) { gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); gspca_frame_add(gspca_dev, FIRST_PACKET, NULL, 0); } else { #if IS_ENABLED(CONFIG_INPUT) u8 button_state = st & 0x40 ? 1 : 0; if (sd->snapshot_pressed != button_state) { input_report_key(gspca_dev->input_dev, KEY_CAMERA, button_state); input_sync(gspca_dev->input_dev); sd->snapshot_pressed = button_state; } #endif if (st & 0x01) continue; } gspca_frame_add(gspca_dev, INTER_PACKET, data, data_urb->iso_frame_desc[i].actual_length); } resubmit: if (data_urb) { st = usb_submit_urb(data_urb, GFP_ATOMIC); if (st < 0) gspca_err(gspca_dev, "usb_submit_urb(data_urb) ret %d\n", st); } st = usb_submit_urb(status_urb, GFP_ATOMIC); if (st < 0) gspca_err(gspca_dev, "usb_submit_urb(status_urb) ret %d\n", st); } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: konica_stream_off(gspca_dev); reg_w(gspca_dev, ctrl->val, BRIGHTNESS_REG); konica_stream_on(gspca_dev); break; case V4L2_CID_CONTRAST: konica_stream_off(gspca_dev); reg_w(gspca_dev, ctrl->val, CONTRAST_REG); konica_stream_on(gspca_dev); break; case V4L2_CID_SATURATION: konica_stream_off(gspca_dev); reg_w(gspca_dev, ctrl->val, SATURATION_REG); konica_stream_on(gspca_dev); break; case V4L2_CID_WHITE_BALANCE_TEMPERATURE: konica_stream_off(gspca_dev); reg_w(gspca_dev, ctrl->val, WHITEBAL_REG); konica_stream_on(gspca_dev); break; case V4L2_CID_SHARPNESS: konica_stream_off(gspca_dev); reg_w(gspca_dev, ctrl->val, SHARPNESS_REG); konica_stream_on(gspca_dev); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 5); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 9, 1, 4); /* Needs to be verified */ v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_CONTRAST, 0, 9, 1, 4); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_SATURATION, 0, 9, 1, 4); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_WHITE_BALANCE_TEMPERATURE, 0, 33, 1, 25); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_SHARPNESS, 0, 9, 1, 4); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .start = sd_start, .stopN = sd_stopN, #if IS_ENABLED(CONFIG_INPUT) .other_input = 1, #endif }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x04c8, 0x0720)}, /* Intel YC 76 */ {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
252 316 5 2 2 2 5 5 2 251 252 252 309 309 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 // SPDX-License-Identifier: GPL-2.0-or-later /* * Cryptographic API. * * Glue code for the SHA1 Secure Hash Algorithm assembler implementations * using SSSE3, AVX, AVX2, and SHA-NI instructions. * * This file is based on sha1_generic.c * * Copyright (c) Alan Smithee. * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> * Copyright (c) Mathias Krause <minipli@googlemail.com> * Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <asm/cpu_device_id.h> #include <asm/simd.h> #include <crypto/internal/hash.h> #include <crypto/sha1.h> #include <crypto/sha1_base.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/module.h> static const struct x86_cpu_id module_cpu_ids[] = { X86_MATCH_FEATURE(X86_FEATURE_SHA_NI, NULL), X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL), X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL), X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids); static inline int sha1_update(struct shash_desc *desc, const u8 *data, unsigned int len, sha1_block_fn *sha1_xform) { int remain; /* * Make sure struct sha1_state begins directly with the SHA1 * 160-bit internal state, as this is what the asm functions expect. */ BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0); kernel_fpu_begin(); remain = sha1_base_do_update_blocks(desc, data, len, sha1_xform); kernel_fpu_end(); return remain; } static inline int sha1_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out, sha1_block_fn *sha1_xform) { kernel_fpu_begin(); sha1_base_do_finup(desc, data, len, sha1_xform); kernel_fpu_end(); return sha1_base_finish(desc, out); } asmlinkage void sha1_transform_ssse3(struct sha1_state *state, const u8 *data, int blocks); static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { return sha1_update(desc, data, len, sha1_transform_ssse3); } static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { return sha1_finup(desc, data, len, out, sha1_transform_ssse3); } static struct shash_alg sha1_ssse3_alg = { .digestsize = SHA1_DIGEST_SIZE, .init = sha1_base_init, .update = sha1_ssse3_update, .finup = sha1_ssse3_finup, .descsize = SHA1_STATE_SIZE, .base = { .cra_name = "sha1", .cra_driver_name = "sha1-ssse3", .cra_priority = 150, .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_module = THIS_MODULE, } }; static int register_sha1_ssse3(void) { if (boot_cpu_has(X86_FEATURE_SSSE3)) return crypto_register_shash(&sha1_ssse3_alg); return 0; } static void unregister_sha1_ssse3(void) { if (boot_cpu_has(X86_FEATURE_SSSE3)) crypto_unregister_shash(&sha1_ssse3_alg); } asmlinkage void sha1_transform_avx(struct sha1_state *state, const u8 *data, int blocks); static int sha1_avx_update(struct shash_desc *desc, const u8 *data, unsigned int len) { return sha1_update(desc, data, len, sha1_transform_avx); } static int sha1_avx_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { return sha1_finup(desc, data, len, out, sha1_transform_avx); } static struct shash_alg sha1_avx_alg = { .digestsize = SHA1_DIGEST_SIZE, .init = sha1_base_init, .update = sha1_avx_update, .finup = sha1_avx_finup, .descsize = SHA1_STATE_SIZE, .base = { .cra_name = "sha1", .cra_driver_name = "sha1-avx", .cra_priority = 160, .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_module = THIS_MODULE, } }; static bool avx_usable(void) { if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { if (boot_cpu_has(X86_FEATURE_AVX)) pr_info("AVX detected but unusable.\n"); return false; } return true; } static int register_sha1_avx(void) { if (avx_usable()) return crypto_register_shash(&sha1_avx_alg); return 0; } static void unregister_sha1_avx(void) { if (avx_usable()) crypto_unregister_shash(&sha1_avx_alg); } #define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ asmlinkage void sha1_transform_avx2(struct sha1_state *state, const u8 *data, int blocks); static bool avx2_usable(void) { if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI1) && boot_cpu_has(X86_FEATURE_BMI2)) return true; return false; } static inline void sha1_apply_transform_avx2(struct sha1_state *state, const u8 *data, int blocks) { /* Select the optimal transform based on data block size */ if (blocks >= SHA1_AVX2_BLOCK_OPTSIZE) sha1_transform_avx2(state, data, blocks); else sha1_transform_avx(state, data, blocks); } static int sha1_avx2_update(struct shash_desc *desc, const u8 *data, unsigned int len) { return sha1_update(desc, data, len, sha1_apply_transform_avx2); } static int sha1_avx2_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { return sha1_finup(desc, data, len, out, sha1_apply_transform_avx2); } static struct shash_alg sha1_avx2_alg = { .digestsize = SHA1_DIGEST_SIZE, .init = sha1_base_init, .update = sha1_avx2_update, .finup = sha1_avx2_finup, .descsize = SHA1_STATE_SIZE, .base = { .cra_name = "sha1", .cra_driver_name = "sha1-avx2", .cra_priority = 170, .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_module = THIS_MODULE, } }; static int register_sha1_avx2(void) { if (avx2_usable()) return crypto_register_shash(&sha1_avx2_alg); return 0; } static void unregister_sha1_avx2(void) { if (avx2_usable()) crypto_unregister_shash(&sha1_avx2_alg); } asmlinkage void sha1_ni_transform(struct sha1_state *digest, const u8 *data, int rounds); static int sha1_ni_update(struct shash_desc *desc, const u8 *data, unsigned int len) { return sha1_update(desc, data, len, sha1_ni_transform); } static int sha1_ni_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { return sha1_finup(desc, data, len, out, sha1_ni_transform); } static struct shash_alg sha1_ni_alg = { .digestsize = SHA1_DIGEST_SIZE, .init = sha1_base_init, .update = sha1_ni_update, .finup = sha1_ni_finup, .descsize = SHA1_STATE_SIZE, .base = { .cra_name = "sha1", .cra_driver_name = "sha1-ni", .cra_priority = 250, .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_module = THIS_MODULE, } }; static int register_sha1_ni(void) { if (boot_cpu_has(X86_FEATURE_SHA_NI)) return crypto_register_shash(&sha1_ni_alg); return 0; } static void unregister_sha1_ni(void) { if (boot_cpu_has(X86_FEATURE_SHA_NI)) crypto_unregister_shash(&sha1_ni_alg); } static int __init sha1_ssse3_mod_init(void) { if (!x86_match_cpu(module_cpu_ids)) return -ENODEV; if (register_sha1_ssse3()) goto fail; if (register_sha1_avx()) { unregister_sha1_ssse3(); goto fail; } if (register_sha1_avx2()) { unregister_sha1_avx(); unregister_sha1_ssse3(); goto fail; } if (register_sha1_ni()) { unregister_sha1_avx2(); unregister_sha1_avx(); unregister_sha1_ssse3(); goto fail; } return 0; fail: return -ENODEV; } static void __exit sha1_ssse3_mod_fini(void) { unregister_sha1_ni(); unregister_sha1_avx2(); unregister_sha1_avx(); unregister_sha1_ssse3(); } module_init(sha1_ssse3_mod_init); module_exit(sha1_ssse3_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, Supplemental SSE3 accelerated"); MODULE_ALIAS_CRYPTO("sha1"); MODULE_ALIAS_CRYPTO("sha1-ssse3"); MODULE_ALIAS_CRYPTO("sha1-avx"); MODULE_ALIAS_CRYPTO("sha1-avx2"); MODULE_ALIAS_CRYPTO("sha1-ni");
6 8 4 14 2 1 2 6 1 4 10 26 8 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 // SPDX-License-Identifier: GPL-2.0-only /* * VMware VMCI Driver * * Copyright (C) 2012 VMware, Inc. All rights reserved. */ #include <linux/vmw_vmci_defs.h> #include <linux/vmw_vmci_api.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/bug.h> #include "vmci_datagram.h" #include "vmci_resource.h" #include "vmci_context.h" #include "vmci_driver.h" #include "vmci_event.h" #include "vmci_route.h" /* * struct datagram_entry describes the datagram entity. It is used for datagram * entities created only on the host. */ struct datagram_entry { struct vmci_resource resource; u32 flags; bool run_delayed; vmci_datagram_recv_cb recv_cb; void *client_data; u32 priv_flags; }; struct delayed_datagram_info { struct datagram_entry *entry; struct work_struct work; bool in_dg_host_queue; /* msg and msg_payload must be together. */ struct vmci_datagram msg; u8 msg_payload[]; }; /* Number of in-flight host->host datagrams */ static atomic_t delayed_dg_host_queue_size = ATOMIC_INIT(0); /* * Create a datagram entry given a handle pointer. */ static int dg_create_handle(u32 resource_id, u32 flags, u32 priv_flags, vmci_datagram_recv_cb recv_cb, void *client_data, struct vmci_handle *out_handle) { int result; u32 context_id; struct vmci_handle handle; struct datagram_entry *entry; if ((flags & VMCI_FLAG_WELLKNOWN_DG_HND) != 0) return VMCI_ERROR_INVALID_ARGS; if ((flags & VMCI_FLAG_ANYCID_DG_HND) != 0) { context_id = VMCI_INVALID_ID; } else { context_id = vmci_get_context_id(); if (context_id == VMCI_INVALID_ID) return VMCI_ERROR_NO_RESOURCES; } handle = vmci_make_handle(context_id, resource_id); entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) { pr_warn("Failed allocating memory for datagram entry\n"); return VMCI_ERROR_NO_MEM; } entry->run_delayed = (flags & VMCI_FLAG_DG_DELAYED_CB) ? true : false; entry->flags = flags; entry->recv_cb = recv_cb; entry->client_data = client_data; entry->priv_flags = priv_flags; /* Make datagram resource live. */ result = vmci_resource_add(&entry->resource, VMCI_RESOURCE_TYPE_DATAGRAM, handle); if (result != VMCI_SUCCESS) { pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d\n", handle.context, handle.resource, result); kfree(entry); return result; } *out_handle = vmci_resource_handle(&entry->resource); return VMCI_SUCCESS; } /* * Internal utility function with the same purpose as * vmci_datagram_get_priv_flags that also takes a context_id. */ static int vmci_datagram_get_priv_flags(u32 context_id, struct vmci_handle handle, u32 *priv_flags) { if (context_id == VMCI_INVALID_ID) return VMCI_ERROR_INVALID_ARGS; if (context_id == VMCI_HOST_CONTEXT_ID) { struct datagram_entry *src_entry; struct vmci_resource *resource; resource = vmci_resource_by_handle(handle, VMCI_RESOURCE_TYPE_DATAGRAM); if (!resource) return VMCI_ERROR_INVALID_ARGS; src_entry = container_of(resource, struct datagram_entry, resource); *priv_flags = src_entry->priv_flags; vmci_resource_put(resource); } else if (context_id == VMCI_HYPERVISOR_CONTEXT_ID) *priv_flags = VMCI_MAX_PRIVILEGE_FLAGS; else *priv_flags = vmci_context_get_priv_flags(context_id); return VMCI_SUCCESS; } /* * Calls the specified callback in a delayed context. */ static void dg_delayed_dispatch(struct work_struct *work) { struct delayed_datagram_info *dg_info = container_of(work, struct delayed_datagram_info, work); dg_info->entry->recv_cb(dg_info->entry->client_data, &dg_info->msg); vmci_resource_put(&dg_info->entry->resource); if (dg_info->in_dg_host_queue) atomic_dec(&delayed_dg_host_queue_size); kfree(dg_info); } /* * Dispatch datagram as a host, to the host, or other vm context. This * function cannot dispatch to hypervisor context handlers. This should * have been handled before we get here by vmci_datagram_dispatch. * Returns number of bytes sent on success, error code otherwise. */ static int dg_dispatch_as_host(u32 context_id, struct vmci_datagram *dg) { int retval; size_t dg_size; u32 src_priv_flags; dg_size = VMCI_DG_SIZE(dg); /* Host cannot send to the hypervisor. */ if (dg->dst.context == VMCI_HYPERVISOR_CONTEXT_ID) return VMCI_ERROR_DST_UNREACHABLE; /* Check that source handle matches sending context. */ if (dg->src.context != context_id) { pr_devel("Sender context (ID=0x%x) is not owner of src datagram entry (handle=0x%x:0x%x)\n", context_id, dg->src.context, dg->src.resource); return VMCI_ERROR_NO_ACCESS; } /* Get hold of privileges of sending endpoint. */ retval = vmci_datagram_get_priv_flags(context_id, dg->src, &src_priv_flags); if (retval != VMCI_SUCCESS) { pr_warn("Couldn't get privileges (handle=0x%x:0x%x)\n", dg->src.context, dg->src.resource); return retval; } /* Determine if we should route to host or guest destination. */ if (dg->dst.context == VMCI_HOST_CONTEXT_ID) { /* Route to host datagram entry. */ struct datagram_entry *dst_entry; struct vmci_resource *resource; if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID && dg->dst.resource == VMCI_EVENT_HANDLER) { return vmci_event_dispatch(dg); } resource = vmci_resource_by_handle(dg->dst, VMCI_RESOURCE_TYPE_DATAGRAM); if (!resource) { pr_devel("Sending to invalid destination (handle=0x%x:0x%x)\n", dg->dst.context, dg->dst.resource); return VMCI_ERROR_INVALID_RESOURCE; } dst_entry = container_of(resource, struct datagram_entry, resource); if (vmci_deny_interaction(src_priv_flags, dst_entry->priv_flags)) { vmci_resource_put(resource); return VMCI_ERROR_NO_ACCESS; } /* * If a VMCI datagram destined for the host is also sent by the * host, we always run it delayed. This ensures that no locks * are held when the datagram callback runs. */ if (dst_entry->run_delayed || dg->src.context == VMCI_HOST_CONTEXT_ID) { struct delayed_datagram_info *dg_info; if (atomic_add_return(1, &delayed_dg_host_queue_size) == VMCI_MAX_DELAYED_DG_HOST_QUEUE_SIZE) { atomic_dec(&delayed_dg_host_queue_size); vmci_resource_put(resource); return VMCI_ERROR_NO_MEM; } dg_info = kmalloc(struct_size(dg_info, msg_payload, dg->payload_size), GFP_ATOMIC); if (!dg_info) { atomic_dec(&delayed_dg_host_queue_size); vmci_resource_put(resource); return VMCI_ERROR_NO_MEM; } dg_info->in_dg_host_queue = true; dg_info->entry = dst_entry; dg_info->msg = *dg; memcpy(&dg_info->msg_payload, dg + 1, dg->payload_size); INIT_WORK(&dg_info->work, dg_delayed_dispatch); schedule_work(&dg_info->work); retval = VMCI_SUCCESS; } else { retval = dst_entry->recv_cb(dst_entry->client_data, dg); vmci_resource_put(resource); if (retval < VMCI_SUCCESS) return retval; } } else { /* Route to destination VM context. */ struct vmci_datagram *new_dg; if (context_id != dg->dst.context) { if (vmci_deny_interaction(src_priv_flags, vmci_context_get_priv_flags (dg->dst.context))) { return VMCI_ERROR_NO_ACCESS; } else if (VMCI_CONTEXT_IS_VM(context_id)) { /* * If the sending context is a VM, it * cannot reach another VM. */ pr_devel("Datagram communication between VMs not supported (src=0x%x, dst=0x%x)\n", context_id, dg->dst.context); return VMCI_ERROR_DST_UNREACHABLE; } } /* We make a copy to enqueue. */ new_dg = kmemdup(dg, dg_size, GFP_KERNEL); if (new_dg == NULL) return VMCI_ERROR_NO_MEM; retval = vmci_ctx_enqueue_datagram(dg->dst.context, new_dg); if (retval < VMCI_SUCCESS) { kfree(new_dg); return retval; } } /* * We currently truncate the size to signed 32 bits. This doesn't * matter for this handler as it only support 4Kb messages. */ return (int)dg_size; } /* * Dispatch datagram as a guest, down through the VMX and potentially to * the host. * Returns number of bytes sent on success, error code otherwise. */ static int dg_dispatch_as_guest(struct vmci_datagram *dg) { int retval; struct vmci_resource *resource; resource = vmci_resource_by_handle(dg->src, VMCI_RESOURCE_TYPE_DATAGRAM); if (!resource) return VMCI_ERROR_NO_HANDLE; retval = vmci_send_datagram(dg); vmci_resource_put(resource); return retval; } /* * Dispatch datagram. This will determine the routing for the datagram * and dispatch it accordingly. * Returns number of bytes sent on success, error code otherwise. */ int vmci_datagram_dispatch(u32 context_id, struct vmci_datagram *dg, bool from_guest) { int retval; enum vmci_route route; BUILD_BUG_ON(sizeof(struct vmci_datagram) != 24); if (dg->payload_size > VMCI_MAX_DG_SIZE || VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE) { pr_devel("Payload (size=%llu bytes) too big to send\n", (unsigned long long)dg->payload_size); return VMCI_ERROR_INVALID_ARGS; } retval = vmci_route(&dg->src, &dg->dst, from_guest, &route); if (retval < VMCI_SUCCESS) { pr_devel("Failed to route datagram (src=0x%x, dst=0x%x, err=%d)\n", dg->src.context, dg->dst.context, retval); return retval; } if (VMCI_ROUTE_AS_HOST == route) { if (VMCI_INVALID_ID == context_id) context_id = VMCI_HOST_CONTEXT_ID; return dg_dispatch_as_host(context_id, dg); } if (VMCI_ROUTE_AS_GUEST == route) return dg_dispatch_as_guest(dg); pr_warn("Unknown route (%d) for datagram\n", route); return VMCI_ERROR_DST_UNREACHABLE; } /* * Invoke the handler for the given datagram. This is intended to be * called only when acting as a guest and receiving a datagram from the * virtual device. */ int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg) { struct vmci_resource *resource; struct datagram_entry *dst_entry; resource = vmci_resource_by_handle(dg->dst, VMCI_RESOURCE_TYPE_DATAGRAM); if (!resource) { pr_devel("destination (handle=0x%x:0x%x) doesn't exist\n", dg->dst.context, dg->dst.resource); return VMCI_ERROR_NO_HANDLE; } dst_entry = container_of(resource, struct datagram_entry, resource); if (dst_entry->run_delayed) { struct delayed_datagram_info *dg_info; dg_info = kmalloc(sizeof(*dg_info) + (size_t)dg->payload_size, GFP_ATOMIC); if (!dg_info) { vmci_resource_put(resource); return VMCI_ERROR_NO_MEM; } dg_info->in_dg_host_queue = false; dg_info->entry = dst_entry; dg_info->msg = *dg; memcpy(&dg_info->msg_payload, dg + 1, dg->payload_size); INIT_WORK(&dg_info->work, dg_delayed_dispatch); schedule_work(&dg_info->work); } else { dst_entry->recv_cb(dst_entry->client_data, dg); vmci_resource_put(resource); } return VMCI_SUCCESS; } /* * vmci_datagram_create_handle_priv() - Create host context datagram endpoint * @resource_id: The resource ID. * @flags: Datagram Flags. * @priv_flags: Privilege Flags. * @recv_cb: Callback when receiving datagrams. * @client_data: Pointer for a datagram_entry struct * @out_handle: vmci_handle that is populated as a result of this function. * * Creates a host context datagram endpoint and returns a handle to it. */ int vmci_datagram_create_handle_priv(u32 resource_id, u32 flags, u32 priv_flags, vmci_datagram_recv_cb recv_cb, void *client_data, struct vmci_handle *out_handle) { if (out_handle == NULL) return VMCI_ERROR_INVALID_ARGS; if (recv_cb == NULL) { pr_devel("Client callback needed when creating datagram\n"); return VMCI_ERROR_INVALID_ARGS; } if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) return VMCI_ERROR_INVALID_ARGS; return dg_create_handle(resource_id, flags, priv_flags, recv_cb, client_data, out_handle); } EXPORT_SYMBOL_GPL(vmci_datagram_create_handle_priv); /* * vmci_datagram_create_handle() - Create host context datagram endpoint * @resource_id: Resource ID. * @flags: Datagram Flags. * @recv_cb: Callback when receiving datagrams. * @client_ata: Pointer for a datagram_entry struct * @out_handle: vmci_handle that is populated as a result of this function. * * Creates a host context datagram endpoint and returns a handle to * it. Same as vmci_datagram_create_handle_priv without the priviledge * flags argument. */ int vmci_datagram_create_handle(u32 resource_id, u32 flags, vmci_datagram_recv_cb recv_cb, void *client_data, struct vmci_handle *out_handle) { return vmci_datagram_create_handle_priv( resource_id, flags, VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS, recv_cb, client_data, out_handle); } EXPORT_SYMBOL_GPL(vmci_datagram_create_handle); /* * vmci_datagram_destroy_handle() - Destroys datagram handle * @handle: vmci_handle to be destroyed and reaped. * * Use this function to destroy any datagram handles created by * vmci_datagram_create_handle{,Priv} functions. */ int vmci_datagram_destroy_handle(struct vmci_handle handle) { struct datagram_entry *entry; struct vmci_resource *resource; resource = vmci_resource_by_handle(handle, VMCI_RESOURCE_TYPE_DATAGRAM); if (!resource) { pr_devel("Failed to destroy datagram (handle=0x%x:0x%x)\n", handle.context, handle.resource); return VMCI_ERROR_NOT_FOUND; } entry = container_of(resource, struct datagram_entry, resource); vmci_resource_put(&entry->resource); vmci_resource_remove(&entry->resource); kfree(entry); return VMCI_SUCCESS; } EXPORT_SYMBOL_GPL(vmci_datagram_destroy_handle); /* * vmci_datagram_send() - Send a datagram * @msg: The datagram to send. * * Sends the provided datagram on its merry way. */ int vmci_datagram_send(struct vmci_datagram *msg) { if (msg == NULL) return VMCI_ERROR_INVALID_ARGS; return vmci_datagram_dispatch(VMCI_INVALID_ID, msg, false); } EXPORT_SYMBOL_GPL(vmci_datagram_send);
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> * * Based on Rusty Russell's IPv4 NAT code. Development of IPv6 NAT * funded by Astaro. */ #include <linux/module.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/ipv6.h> #include <net/ipv6.h> #include <net/netfilter/nf_nat.h> struct ip6table_nat_pernet { struct nf_hook_ops *nf_nat_ops; }; static unsigned int ip6table_nat_net_id __read_mostly; static const struct xt_table nf_nat_ipv6_table = { .name = "nat", .valid_hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, .af = NFPROTO_IPV6, }; static const struct nf_hook_ops nf_nat_ipv6_ops[] = { { .hook = ip6t_do_table, .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_NAT_DST, }, { .hook = ip6t_do_table, .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_NAT_SRC, }, { .hook = ip6t_do_table, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST, }, { .hook = ip6t_do_table, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC, }, }; static int ip6t_nat_register_lookups(struct net *net) { struct ip6table_nat_pernet *xt_nat_net; struct nf_hook_ops *ops; struct xt_table *table; int i, ret; table = xt_find_table(net, NFPROTO_IPV6, "nat"); if (WARN_ON_ONCE(!table)) return -ENOENT; xt_nat_net = net_generic(net, ip6table_nat_net_id); ops = kmemdup(nf_nat_ipv6_ops, sizeof(nf_nat_ipv6_ops), GFP_KERNEL); if (!ops) return -ENOMEM; for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++) { ops[i].priv = table; ret = nf_nat_ipv6_register_fn(net, &ops[i]); if (ret) { while (i) nf_nat_ipv6_unregister_fn(net, &ops[--i]); kfree(ops); return ret; } } xt_nat_net->nf_nat_ops = ops; return 0; } static void ip6t_nat_unregister_lookups(struct net *net) { struct ip6table_nat_pernet *xt_nat_net = net_generic(net, ip6table_nat_net_id); struct nf_hook_ops *ops = xt_nat_net->nf_nat_ops; int i; if (!ops) return; for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++) nf_nat_ipv6_unregister_fn(net, &ops[i]); kfree(ops); } static int ip6table_nat_table_init(struct net *net) { struct ip6t_replace *repl; int ret; repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table); if (repl == NULL) return -ENOMEM; ret = ip6t_register_table(net, &nf_nat_ipv6_table, repl, NULL); if (ret < 0) { kfree(repl); return ret; } ret = ip6t_nat_register_lookups(net); if (ret < 0) ip6t_unregister_table_exit(net, "nat"); kfree(repl); return ret; } static void __net_exit ip6table_nat_net_pre_exit(struct net *net) { ip6t_nat_unregister_lookups(net); } static void __net_exit ip6table_nat_net_exit(struct net *net) { ip6t_unregister_table_exit(net, "nat"); } static struct pernet_operations ip6table_nat_net_ops = { .pre_exit = ip6table_nat_net_pre_exit, .exit = ip6table_nat_net_exit, .id = &ip6table_nat_net_id, .size = sizeof(struct ip6table_nat_pernet), }; static int __init ip6table_nat_init(void) { int ret; /* net->gen->ptr[ip6table_nat_net_id] must be allocated * before calling ip6t_nat_register_lookups(). */ ret = register_pernet_subsys(&ip6table_nat_net_ops); if (ret < 0) return ret; ret = xt_register_template(&nf_nat_ipv6_table, ip6table_nat_table_init); if (ret) unregister_pernet_subsys(&ip6table_nat_net_ops); return ret; } static void __exit ip6table_nat_exit(void) { xt_unregister_template(&nf_nat_ipv6_table); unregister_pernet_subsys(&ip6table_nat_net_ops); } module_init(ip6table_nat_init); module_exit(ip6table_nat_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Ip6tables legacy nat table");
4 1 2 1 1 1 1 1 2 1 1 1 200 200 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 // SPDX-License-Identifier: GPL-2.0-or-later /* * Checksum updating actions * * Copyright (c) 2010 Gregoire Baron <baronchon@n7mm.org> */ #include <linux/types.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/netlink.h> #include <net/netlink.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/icmp.h> #include <linux/icmpv6.h> #include <linux/igmp.h> #include <net/tcp.h> #include <net/udp.h> #include <net/ip6_checksum.h> #include <net/sctp/checksum.h> #include <net/act_api.h> #include <net/pkt_cls.h> #include <linux/tc_act/tc_csum.h> #include <net/tc_act/tc_csum.h> #include <net/tc_wrapper.h> static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, }; static struct tc_action_ops act_csum_ops; static int tcf_csum_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_csum_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_csum_params *params_new; struct nlattr *tb[TCA_CSUM_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_csum *parm; struct tcf_csum *p; int ret = 0, err; u32 index; if (nla == NULL) return -EINVAL; err = nla_parse_nested_deprecated(tb, TCA_CSUM_MAX, nla, csum_policy, NULL); if (err < 0) return err; if (tb[TCA_CSUM_PARMS] == NULL) return -EINVAL; parm = nla_data(tb[TCA_CSUM_PARMS]); index = parm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { ret = tcf_idr_create_from_flags(tn, index, est, a, &act_csum_ops, bind, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; } ret = ACT_P_CREATED; } else if (err > 0) { if (bind) /* dont override defaults */ return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } } else { return err; } err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; p = to_tcf_csum(*a); params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { err = -ENOMEM; goto put_chain; } params_new->update_flags = parm->update_flags; spin_lock_bh(&p->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); params_new = rcu_replace_pointer(p->params, params_new, lockdep_is_held(&p->tcf_lock)); spin_unlock_bh(&p->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); if (params_new) kfree_rcu(params_new, rcu); return ret; put_chain: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: tcf_idr_release(*a, bind); return err; } /** * tcf_csum_skb_nextlayer - Get next layer pointer * @skb: sk_buff to use * @ihl: previous summed headers length * @ipl: complete packet length * @jhl: next header length * * Check the expected next layer availability in the specified sk_buff. * Return the next layer pointer if pass, NULL otherwise. */ static void *tcf_csum_skb_nextlayer(struct sk_buff *skb, unsigned int ihl, unsigned int ipl, unsigned int jhl) { int ntkoff = skb_network_offset(skb); int hl = ihl + jhl; if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) || skb_try_make_writable(skb, hl + ntkoff)) return NULL; else return (void *)(skb_network_header(skb) + ihl); } static int tcf_csum_ipv4_icmp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl) { struct icmphdr *icmph; icmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmph)); if (icmph == NULL) return 0; icmph->checksum = 0; skb->csum = csum_partial(icmph, ipl - ihl, 0); icmph->checksum = csum_fold(skb->csum); skb->ip_summed = CHECKSUM_NONE; return 1; } static int tcf_csum_ipv4_igmp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl) { struct igmphdr *igmph; igmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*igmph)); if (igmph == NULL) return 0; igmph->csum = 0; skb->csum = csum_partial(igmph, ipl - ihl, 0); igmph->csum = csum_fold(skb->csum); skb->ip_summed = CHECKSUM_NONE; return 1; } static int tcf_csum_ipv6_icmp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl) { struct icmp6hdr *icmp6h; const struct ipv6hdr *ip6h; icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h)); if (icmp6h == NULL) return 0; ip6h = ipv6_hdr(skb); icmp6h->icmp6_cksum = 0; skb->csum = csum_partial(icmp6h, ipl - ihl, 0); icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ipl - ihl, IPPROTO_ICMPV6, skb->csum); skb->ip_summed = CHECKSUM_NONE; return 1; } static int tcf_csum_ipv4_tcp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl) { struct tcphdr *tcph; const struct iphdr *iph; if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) return 1; tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); if (tcph == NULL) return 0; iph = ip_hdr(skb); tcph->check = 0; skb->csum = csum_partial(tcph, ipl - ihl, 0); tcph->check = tcp_v4_check(ipl - ihl, iph->saddr, iph->daddr, skb->csum); skb->ip_summed = CHECKSUM_NONE; return 1; } static int tcf_csum_ipv6_tcp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl) { struct tcphdr *tcph; const struct ipv6hdr *ip6h; if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) return 1; tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); if (tcph == NULL) return 0; ip6h = ipv6_hdr(skb); tcph->check = 0; skb->csum = csum_partial(tcph, ipl - ihl, 0); tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ipl - ihl, IPPROTO_TCP, skb->csum); skb->ip_summed = CHECKSUM_NONE; return 1; } static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl, int udplite) { struct udphdr *udph; const struct iphdr *iph; u16 ul; if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) return 1; /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, * UDPLITE uses udph->len for another thing, * Use iph->tot_len, or just ipl. */ udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph)); if (udph == NULL) return 0; iph = ip_hdr(skb); ul = ntohs(udph->len); if (udplite || udph->check) { udph->check = 0; if (udplite) { if (ul == 0) skb->csum = csum_partial(udph, ipl - ihl, 0); else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl)) skb->csum = csum_partial(udph, ul, 0); else goto ignore_obscure_skb; } else { if (ul != ipl - ihl) goto ignore_obscure_skb; skb->csum = csum_partial(udph, ul, 0); } udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, ul, iph->protocol, skb->csum); if (!udph->check) udph->check = CSUM_MANGLED_0; } skb->ip_summed = CHECKSUM_NONE; ignore_obscure_skb: return 1; } static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl, int udplite) { struct udphdr *udph; const struct ipv6hdr *ip6h; u16 ul; if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) return 1; /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, * UDPLITE uses udph->len for another thing, * Use ip6h->payload_len + sizeof(*ip6h) ... , or just ipl. */ udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph)); if (udph == NULL) return 0; ip6h = ipv6_hdr(skb); ul = ntohs(udph->len); udph->check = 0; if (udplite) { if (ul == 0) skb->csum = csum_partial(udph, ipl - ihl, 0); else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl)) skb->csum = csum_partial(udph, ul, 0); else goto ignore_obscure_skb; } else { if (ul != ipl - ihl) goto ignore_obscure_skb; skb->csum = csum_partial(udph, ul, 0); } udph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ul, udplite ? IPPROTO_UDPLITE : IPPROTO_UDP, skb->csum); if (!udph->check) udph->check = CSUM_MANGLED_0; skb->ip_summed = CHECKSUM_NONE; ignore_obscure_skb: return 1; } static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl) { struct sctphdr *sctph; if (skb_is_gso(skb) && skb_is_gso_sctp(skb)) return 1; sctph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*sctph)); if (!sctph) return 0; sctph->checksum = sctp_compute_cksum(skb, skb_network_offset(skb) + ihl); skb_reset_csum_not_inet(skb); return 1; } static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags) { const struct iphdr *iph; int ntkoff; ntkoff = skb_network_offset(skb); if (!pskb_may_pull(skb, sizeof(*iph) + ntkoff)) goto fail; iph = ip_hdr(skb); switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) { case IPPROTO_ICMP: if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP) if (!tcf_csum_ipv4_icmp(skb, iph->ihl * 4, ntohs(iph->tot_len))) goto fail; break; case IPPROTO_IGMP: if (update_flags & TCA_CSUM_UPDATE_FLAG_IGMP) if (!tcf_csum_ipv4_igmp(skb, iph->ihl * 4, ntohs(iph->tot_len))) goto fail; break; case IPPROTO_TCP: if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) if (!tcf_csum_ipv4_tcp(skb, iph->ihl * 4, ntohs(iph->tot_len))) goto fail; break; case IPPROTO_UDP: if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4, ntohs(iph->tot_len), 0)) goto fail; break; case IPPROTO_UDPLITE: if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4, ntohs(iph->tot_len), 1)) goto fail; break; case IPPROTO_SCTP: if ((update_flags & TCA_CSUM_UPDATE_FLAG_SCTP) && !tcf_csum_sctp(skb, iph->ihl * 4, ntohs(iph->tot_len))) goto fail; break; } if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) { if (skb_try_make_writable(skb, sizeof(*iph) + ntkoff)) goto fail; ip_send_check(ip_hdr(skb)); } return 1; fail: return 0; } static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh, unsigned int ixhl, unsigned int *pl) { int off, len, optlen; unsigned char *xh = (void *)ip6xh; off = sizeof(*ip6xh); len = ixhl - off; while (len > 1) { switch (xh[off]) { case IPV6_TLV_PAD1: optlen = 1; break; case IPV6_TLV_JUMBO: optlen = xh[off + 1] + 2; if (optlen != 6 || len < 6 || (off & 3) != 2) /* wrong jumbo option length/alignment */ return 0; *pl = ntohl(*(__be32 *)(xh + off + 2)); goto done; default: optlen = xh[off + 1] + 2; if (optlen > len) /* ignore obscure options */ goto done; break; } off += optlen; len -= optlen; } done: return 1; } static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags) { struct ipv6hdr *ip6h; struct ipv6_opt_hdr *ip6xh; unsigned int hl, ixhl; unsigned int pl; int ntkoff; u8 nexthdr; ntkoff = skb_network_offset(skb); hl = sizeof(*ip6h); if (!pskb_may_pull(skb, hl + ntkoff)) goto fail; ip6h = ipv6_hdr(skb); pl = ntohs(ip6h->payload_len); nexthdr = ip6h->nexthdr; do { switch (nexthdr) { case NEXTHDR_FRAGMENT: goto ignore_skb; case NEXTHDR_ROUTING: case NEXTHDR_HOP: case NEXTHDR_DEST: if (!pskb_may_pull(skb, hl + sizeof(*ip6xh) + ntkoff)) goto fail; ip6xh = (void *)(skb_network_header(skb) + hl); ixhl = ipv6_optlen(ip6xh); if (!pskb_may_pull(skb, hl + ixhl + ntkoff)) goto fail; ip6xh = (void *)(skb_network_header(skb) + hl); if ((nexthdr == NEXTHDR_HOP) && !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl))) goto fail; nexthdr = ip6xh->nexthdr; hl += ixhl; break; case IPPROTO_ICMPV6: if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP) if (!tcf_csum_ipv6_icmp(skb, hl, pl + sizeof(*ip6h))) goto fail; goto done; case IPPROTO_TCP: if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) if (!tcf_csum_ipv6_tcp(skb, hl, pl + sizeof(*ip6h))) goto fail; goto done; case IPPROTO_UDP: if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) if (!tcf_csum_ipv6_udp(skb, hl, pl + sizeof(*ip6h), 0)) goto fail; goto done; case IPPROTO_UDPLITE: if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) if (!tcf_csum_ipv6_udp(skb, hl, pl + sizeof(*ip6h), 1)) goto fail; goto done; case IPPROTO_SCTP: if ((update_flags & TCA_CSUM_UPDATE_FLAG_SCTP) && !tcf_csum_sctp(skb, hl, pl + sizeof(*ip6h))) goto fail; goto done; default: goto ignore_skb; } } while (pskb_may_pull(skb, hl + 1 + ntkoff)); done: ignore_skb: return 1; fail: return 0; } TC_INDIRECT_SCOPE int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_csum *p = to_tcf_csum(a); bool orig_vlan_tag_present = false; unsigned int vlan_hdr_count = 0; struct tcf_csum_params *params; u32 update_flags; __be16 protocol; int action; params = rcu_dereference_bh(p->params); tcf_lastuse_update(&p->tcf_tm); tcf_action_update_bstats(&p->common, skb); action = READ_ONCE(p->tcf_action); if (unlikely(action == TC_ACT_SHOT)) goto drop; update_flags = params->update_flags; protocol = skb_protocol(skb, false); again: switch (protocol) { case cpu_to_be16(ETH_P_IP): if (!tcf_csum_ipv4(skb, update_flags)) goto drop; break; case cpu_to_be16(ETH_P_IPV6): if (!tcf_csum_ipv6(skb, update_flags)) goto drop; break; case cpu_to_be16(ETH_P_8021AD): fallthrough; case cpu_to_be16(ETH_P_8021Q): if (skb_vlan_tag_present(skb) && !orig_vlan_tag_present) { protocol = skb->protocol; orig_vlan_tag_present = true; } else { struct vlan_hdr *vlan = (struct vlan_hdr *)skb->data; protocol = vlan->h_vlan_encapsulated_proto; skb_pull(skb, VLAN_HLEN); skb_reset_network_header(skb); vlan_hdr_count++; } goto again; } out: /* Restore the skb for the pulled VLAN tags */ while (vlan_hdr_count--) { skb_push(skb, VLAN_HLEN); skb_reset_network_header(skb); } return action; drop: tcf_action_inc_drop_qstats(&p->common); action = TC_ACT_SHOT; goto out; } static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_csum *p = to_tcf_csum(a); struct tcf_csum_params *params; struct tc_csum opt = { .index = p->tcf_index, .refcnt = refcount_read(&p->tcf_refcnt) - ref, .bindcnt = atomic_read(&p->tcf_bindcnt) - bind, }; struct tcf_t t; spin_lock_bh(&p->tcf_lock); params = rcu_dereference_protected(p->params, lockdep_is_held(&p->tcf_lock)); opt.action = p->tcf_action; opt.update_flags = params->update_flags; if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt)) goto nla_put_failure; tcf_tm_dump(&t, &p->tcf_tm); if (nla_put_64bit(skb, TCA_CSUM_TM, sizeof(t), &t, TCA_CSUM_PAD)) goto nla_put_failure; spin_unlock_bh(&p->tcf_lock); return skb->len; nla_put_failure: spin_unlock_bh(&p->tcf_lock); nlmsg_trim(skb, b); return -1; } static void tcf_csum_cleanup(struct tc_action *a) { struct tcf_csum *p = to_tcf_csum(a); struct tcf_csum_params *params; params = rcu_dereference_protected(p->params, 1); if (params) kfree_rcu(params, rcu); } static size_t tcf_csum_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_csum)); } static int tcf_csum_offload_act_setup(struct tc_action *act, void *entry_data, u32 *index_inc, bool bind, struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; entry->id = FLOW_ACTION_CSUM; entry->csum_flags = tcf_csum_update_flags(act); *index_inc = 1; } else { struct flow_offload_action *fl_action = entry_data; fl_action->id = FLOW_ACTION_CSUM; } return 0; } static struct tc_action_ops act_csum_ops = { .kind = "csum", .id = TCA_ID_CSUM, .owner = THIS_MODULE, .act = tcf_csum_act, .dump = tcf_csum_dump, .init = tcf_csum_init, .cleanup = tcf_csum_cleanup, .get_fill_size = tcf_csum_get_fill_size, .offload_act_setup = tcf_csum_offload_act_setup, .size = sizeof(struct tcf_csum), }; MODULE_ALIAS_NET_ACT("csum"); static __net_init int csum_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_csum_ops.net_id); return tc_action_net_init(net, tn, &act_csum_ops); } static void __net_exit csum_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_csum_ops.net_id); } static struct pernet_operations csum_net_ops = { .init = csum_init_net, .exit_batch = csum_exit_net, .id = &act_csum_ops.net_id, .size = sizeof(struct tc_action_net), }; MODULE_DESCRIPTION("Checksum updating actions"); MODULE_LICENSE("GPL"); static int __init csum_init_module(void) { return tcf_register_action(&act_csum_ops, &csum_net_ops); } static void __exit csum_cleanup_module(void) { tcf_unregister_action(&act_csum_ops, &csum_net_ops); } module_init(csum_init_module); module_exit(csum_cleanup_module);
19 1 2 7 1 3 1 2 9 1 3 5 2 2 1 2 3 3 3 3 6 6 19 5 5 2 3 5 12 2 1 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 /* Key type used to cache DNS lookups made by the kernel * * See Documentation/networking/dns_resolver.rst * * Copyright (c) 2007 Igor Mammedov * Author(s): Igor Mammedov (niallain@gmail.com) * Steve French (sfrench@us.ibm.com) * Wang Lei (wang840925@gmail.com) * David Howells (dhowells@redhat.com) * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation; either version 2.1 of the License, or * (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/kernel.h> #include <linux/keyctl.h> #include <linux/err.h> #include <linux/seq_file.h> #include <linux/dns_resolver.h> #include <keys/dns_resolver-type.h> #include <keys/user-type.h> #include "internal.h" MODULE_DESCRIPTION("DNS Resolver"); MODULE_AUTHOR("Wang Lei"); MODULE_LICENSE("GPL"); unsigned int dns_resolver_debug; module_param_named(debug, dns_resolver_debug, uint, 0644); MODULE_PARM_DESC(debug, "DNS Resolver debugging mask"); const struct cred *dns_resolver_cache; #define DNS_ERRORNO_OPTION "dnserror" /* * Preparse instantiation data for a dns_resolver key. * * For normal hostname lookups, the data must be a NUL-terminated string, with * the NUL char accounted in datalen. * * If the data contains a '#' characters, then we take the clause after each * one to be an option of the form 'key=value'. The actual data of interest is * the string leading up to the first '#'. For instance: * * "ip1,ip2,...#foo=bar" * * For server list requests, the data must begin with a NUL char and be * followed by a byte indicating the version of the data format. Version 1 * looks something like (note this is packed): * * u8 Non-string marker (ie. 0) * u8 Content (DNS_PAYLOAD_IS_*) * u8 Version (e.g. 1) * u8 Source of server list * u8 Lookup status of server list * u8 Number of servers * foreach-server { * __le16 Name length * __le16 Priority (as per SRV record, low first) * __le16 Weight (as per SRV record, higher first) * __le16 Port * u8 Source of address list * u8 Lookup status of address list * u8 Protocol (DNS_SERVER_PROTOCOL_*) * u8 Number of addresses * char[] Name (not NUL-terminated) * foreach-address { * u8 Family (DNS_ADDRESS_IS_*) * union { * u8[4] ipv4_addr * u8[16] ipv6_addr * } * } * } * */ static int dns_resolver_preparse(struct key_preparsed_payload *prep) { struct user_key_payload *upayload; unsigned long derrno; int ret; int datalen = prep->datalen, result_len = 0; const char *data = prep->data, *end, *opt; if (datalen <= 1 || !data) return -EINVAL; if (data[0] == 0) { const struct dns_server_list_v1_header *v1; /* It may be a server list. */ if (datalen < sizeof(*v1)) return -EINVAL; v1 = (const struct dns_server_list_v1_header *)data; kenter("[%u,%u],%u", v1->hdr.content, v1->hdr.version, datalen); if (v1->hdr.content != DNS_PAYLOAD_IS_SERVER_LIST) { pr_warn_ratelimited( "dns_resolver: Unsupported content type (%u)\n", v1->hdr.content); return -EINVAL; } if (v1->hdr.version != 1) { pr_warn_ratelimited( "dns_resolver: Unsupported server list version (%u)\n", v1->hdr.version); return -EINVAL; } if ((v1->status != DNS_LOOKUP_GOOD && v1->status != DNS_LOOKUP_GOOD_WITH_BAD)) { if (prep->expiry == TIME64_MAX) prep->expiry = ktime_get_real_seconds() + 1; } result_len = datalen; goto store_result; } kenter("'%*.*s',%u", datalen, datalen, data, datalen); if (!data || data[datalen - 1] != '\0') return -EINVAL; datalen--; /* deal with any options embedded in the data */ end = data + datalen; opt = memchr(data, '#', datalen); if (!opt) { /* no options: the entire data is the result */ kdebug("no options"); result_len = datalen; } else { const char *next_opt; result_len = opt - data; opt++; kdebug("options: '%s'", opt); do { int opt_len, opt_nlen; const char *eq; char optval[128]; next_opt = memchr(opt, '#', end - opt) ?: end; opt_len = next_opt - opt; if (opt_len <= 0 || opt_len > sizeof(optval)) { pr_warn_ratelimited("Invalid option length (%d) for dns_resolver key\n", opt_len); return -EINVAL; } eq = memchr(opt, '=', opt_len); if (eq) { opt_nlen = eq - opt; eq++; memcpy(optval, eq, next_opt - eq); optval[next_opt - eq] = '\0'; } else { opt_nlen = opt_len; optval[0] = '\0'; } kdebug("option '%*.*s' val '%s'", opt_nlen, opt_nlen, opt, optval); /* see if it's an error number representing a DNS error * that's to be recorded as the result in this key */ if (opt_nlen == sizeof(DNS_ERRORNO_OPTION) - 1 && memcmp(opt, DNS_ERRORNO_OPTION, opt_nlen) == 0) { kdebug("dns error number option"); ret = kstrtoul(optval, 10, &derrno); if (ret < 0) goto bad_option_value; if (derrno < 1 || derrno > 511) goto bad_option_value; kdebug("dns error no. = %lu", derrno); prep->payload.data[dns_key_error] = ERR_PTR(-derrno); continue; } bad_option_value: pr_warn_ratelimited("Option '%*.*s' to dns_resolver key: bad/missing value\n", opt_nlen, opt_nlen, opt); return -EINVAL; } while (opt = next_opt + 1, opt < end); } /* don't cache the result if we're caching an error saying there's no * result */ if (prep->payload.data[dns_key_error]) { kleave(" = 0 [h_error %ld]", PTR_ERR(prep->payload.data[dns_key_error])); return 0; } store_result: kdebug("store result"); prep->quotalen = result_len; upayload = kmalloc(sizeof(*upayload) + result_len + 1, GFP_KERNEL); if (!upayload) { kleave(" = -ENOMEM"); return -ENOMEM; } upayload->datalen = result_len; memcpy(upayload->data, data, result_len); upayload->data[result_len] = '\0'; prep->payload.data[dns_key_data] = upayload; kleave(" = 0"); return 0; } /* * Clean up the preparse data */ static void dns_resolver_free_preparse(struct key_preparsed_payload *prep) { pr_devel("==>%s()\n", __func__); kfree(prep->payload.data[dns_key_data]); } /* * The description is of the form "[<type>:]<domain_name>" * * The domain name may be a simple name or an absolute domain name (which * should end with a period). The domain name is case-independent. */ static bool dns_resolver_cmp(const struct key *key, const struct key_match_data *match_data) { int slen, dlen, ret = 0; const char *src = key->description, *dsp = match_data->raw_data; kenter("%s,%s", src, dsp); if (!src || !dsp) goto no_match; if (strcasecmp(src, dsp) == 0) goto matched; slen = strlen(src); dlen = strlen(dsp); if (slen <= 0 || dlen <= 0) goto no_match; if (src[slen - 1] == '.') slen--; if (dsp[dlen - 1] == '.') dlen--; if (slen != dlen || strncasecmp(src, dsp, slen) != 0) goto no_match; matched: ret = 1; no_match: kleave(" = %d", ret); return ret; } /* * Preparse the match criterion. */ static int dns_resolver_match_preparse(struct key_match_data *match_data) { match_data->lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE; match_data->cmp = dns_resolver_cmp; return 0; } /* * Describe a DNS key */ static void dns_resolver_describe(const struct key *key, struct seq_file *m) { seq_puts(m, key->description); if (key_is_positive(key)) { int err = PTR_ERR(key->payload.data[dns_key_error]); if (err) seq_printf(m, ": %d", err); else seq_printf(m, ": %u", key->datalen); } } /* * read the DNS data * - the key's semaphore is read-locked */ static long dns_resolver_read(const struct key *key, char *buffer, size_t buflen) { int err = PTR_ERR(key->payload.data[dns_key_error]); if (err) return err; return user_read(key, buffer, buflen); } struct key_type key_type_dns_resolver = { .name = "dns_resolver", .flags = KEY_TYPE_NET_DOMAIN | KEY_TYPE_INSTANT_REAP, .preparse = dns_resolver_preparse, .free_preparse = dns_resolver_free_preparse, .instantiate = generic_key_instantiate, .match_preparse = dns_resolver_match_preparse, .revoke = user_revoke, .destroy = user_destroy, .describe = dns_resolver_describe, .read = dns_resolver_read, }; static int __init init_dns_resolver(void) { struct cred *cred; struct key *keyring; int ret; /* create an override credential set with a special thread keyring in * which DNS requests are cached * * this is used to prevent malicious redirections from being installed * with add_key(). */ cred = prepare_kernel_cred(&init_task); if (!cred) return -ENOMEM; keyring = keyring_alloc(".dns_resolver", GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ, KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto failed_put_cred; } ret = register_key_type(&key_type_dns_resolver); if (ret < 0) goto failed_put_key; /* instruct request_key() to use this special keyring as a cache for * the results it looks up */ set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; dns_resolver_cache = cred; kdebug("DNS resolver keyring: %d\n", key_serial(keyring)); return 0; failed_put_key: key_put(keyring); failed_put_cred: put_cred(cred); return ret; } static void __exit exit_dns_resolver(void) { key_revoke(dns_resolver_cache->thread_keyring); unregister_key_type(&key_type_dns_resolver); put_cred(dns_resolver_cache); } module_init(init_dns_resolver) module_exit(exit_dns_resolver) MODULE_LICENSE("GPL");
13 18 18 13 10 10 10 13 4 13 13 18 13 13 36 27 13 13 97 97 97 3 96 97 96 42 42 41 42 41 11 10 10 34 33 34 34 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 // SPDX-License-Identifier: GPL-2.0-only /* * LED Triggers Core * * Copyright 2005-2007 Openedhand Ltd. * * Author: Richard Purdie <rpurdie@openedhand.com> */ #include <linux/export.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/device.h> #include <linux/timer.h> #include <linux/rwsem.h> #include <linux/leds.h> #include <linux/slab.h> #include <linux/mm.h> #include "leds.h" /* * Nests outside led_cdev->trigger_lock */ static DECLARE_RWSEM(triggers_list_lock); static LIST_HEAD(trigger_list); /* Used by LED Class */ static inline bool trigger_relevant(struct led_classdev *led_cdev, struct led_trigger *trig) { return !trig->trigger_type || trig->trigger_type == led_cdev->trigger_type; } ssize_t led_trigger_write(struct file *filp, struct kobject *kobj, const struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { struct device *dev = kobj_to_dev(kobj); struct led_classdev *led_cdev = dev_get_drvdata(dev); struct led_trigger *trig; int ret = count; mutex_lock(&led_cdev->led_access); if (led_sysfs_is_disabled(led_cdev)) { ret = -EBUSY; goto unlock; } if (sysfs_streq(buf, "none")) { led_trigger_remove(led_cdev); goto unlock; } if (sysfs_streq(buf, "default")) { led_trigger_set_default(led_cdev); goto unlock; } down_read(&triggers_list_lock); list_for_each_entry(trig, &trigger_list, next_trig) { if (sysfs_streq(buf, trig->name) && trigger_relevant(led_cdev, trig)) { down_write(&led_cdev->trigger_lock); led_trigger_set(led_cdev, trig); up_write(&led_cdev->trigger_lock); up_read(&triggers_list_lock); goto unlock; } } /* we come here only if buf matches no trigger */ ret = -EINVAL; up_read(&triggers_list_lock); unlock: mutex_unlock(&led_cdev->led_access); return ret; } EXPORT_SYMBOL_GPL(led_trigger_write); __printf(3, 4) static int led_trigger_snprintf(char *buf, ssize_t size, const char *fmt, ...) { va_list args; int i; va_start(args, fmt); if (size <= 0) i = vsnprintf(NULL, 0, fmt, args); else i = vscnprintf(buf, size, fmt, args); va_end(args); return i; } static int led_trigger_format(char *buf, size_t size, struct led_classdev *led_cdev) { struct led_trigger *trig; int len = led_trigger_snprintf(buf, size, "%s", led_cdev->trigger ? "none" : "[none]"); if (led_cdev->default_trigger) len += led_trigger_snprintf(buf + len, size - len, " default"); list_for_each_entry(trig, &trigger_list, next_trig) { bool hit; if (!trigger_relevant(led_cdev, trig)) continue; hit = led_cdev->trigger && !strcmp(led_cdev->trigger->name, trig->name); len += led_trigger_snprintf(buf + len, size - len, " %s%s%s", hit ? "[" : "", trig->name, hit ? "]" : ""); } len += led_trigger_snprintf(buf + len, size - len, "\n"); return len; } /* * It was stupid to create 10000 cpu triggers, but we are stuck with it now. * Don't make that mistake again. We work around it here by creating binary * attribute, which is not limited by length. This is _not_ good design, do not * copy it. */ ssize_t led_trigger_read(struct file *filp, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t pos, size_t count) { struct device *dev = kobj_to_dev(kobj); struct led_classdev *led_cdev = dev_get_drvdata(dev); void *data; int len; down_read(&triggers_list_lock); down_read(&led_cdev->trigger_lock); len = led_trigger_format(NULL, 0, led_cdev); data = kvmalloc(len + 1, GFP_KERNEL); if (!data) { up_read(&led_cdev->trigger_lock); up_read(&triggers_list_lock); return -ENOMEM; } len = led_trigger_format(data, len + 1, led_cdev); up_read(&led_cdev->trigger_lock); up_read(&triggers_list_lock); len = memory_read_from_buffer(buf, count, &pos, data, len); kvfree(data); return len; } EXPORT_SYMBOL_GPL(led_trigger_read); /* Caller must ensure led_cdev->trigger_lock held */ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig) { char *event = NULL; char *envp[2]; const char *name; int ret; if (!led_cdev->trigger && !trig) return 0; name = trig ? trig->name : "none"; event = kasprintf(GFP_KERNEL, "TRIGGER=%s", name); /* Remove any existing trigger */ if (led_cdev->trigger) { spin_lock(&led_cdev->trigger->leddev_list_lock); list_del_rcu(&led_cdev->trig_list); spin_unlock(&led_cdev->trigger->leddev_list_lock); /* ensure it's no longer visible on the led_cdevs list */ synchronize_rcu(); cancel_work_sync(&led_cdev->set_brightness_work); led_stop_software_blink(led_cdev); device_remove_groups(led_cdev->dev, led_cdev->trigger->groups); if (led_cdev->trigger->deactivate) led_cdev->trigger->deactivate(led_cdev); led_cdev->trigger = NULL; led_cdev->trigger_data = NULL; led_cdev->activated = false; led_cdev->flags &= ~LED_INIT_DEFAULT_TRIGGER; led_set_brightness(led_cdev, LED_OFF); } if (trig) { spin_lock(&trig->leddev_list_lock); list_add_tail_rcu(&led_cdev->trig_list, &trig->led_cdevs); spin_unlock(&trig->leddev_list_lock); led_cdev->trigger = trig; /* * Some activate() calls use led_trigger_event() to initialize * the brightness of the LED for which the trigger is being set. * Ensure the led_cdev is visible on trig->led_cdevs for this. */ synchronize_rcu(); /* * If "set brightness to 0" is pending in workqueue, * we don't want that to be reordered after ->activate() */ flush_work(&led_cdev->set_brightness_work); ret = 0; if (trig->activate) ret = trig->activate(led_cdev); else led_set_brightness(led_cdev, trig->brightness); if (ret) goto err_activate; ret = device_add_groups(led_cdev->dev, trig->groups); if (ret) { dev_err(led_cdev->dev, "Failed to add trigger attributes\n"); goto err_add_groups; } } if (event) { envp[0] = event; envp[1] = NULL; if (kobject_uevent_env(&led_cdev->dev->kobj, KOBJ_CHANGE, envp)) dev_err(led_cdev->dev, "%s: Error sending uevent\n", __func__); kfree(event); } return 0; err_add_groups: if (trig->deactivate) trig->deactivate(led_cdev); err_activate: spin_lock(&led_cdev->trigger->leddev_list_lock); list_del_rcu(&led_cdev->trig_list); spin_unlock(&led_cdev->trigger->leddev_list_lock); synchronize_rcu(); led_cdev->trigger = NULL; led_cdev->trigger_data = NULL; led_set_brightness(led_cdev, LED_OFF); kfree(event); return ret; } EXPORT_SYMBOL_GPL(led_trigger_set); void led_trigger_remove(struct led_classdev *led_cdev) { down_write(&led_cdev->trigger_lock); led_trigger_set(led_cdev, NULL); up_write(&led_cdev->trigger_lock); } EXPORT_SYMBOL_GPL(led_trigger_remove); static bool led_match_default_trigger(struct led_classdev *led_cdev, struct led_trigger *trig) { if (!strcmp(led_cdev->default_trigger, trig->name) && trigger_relevant(led_cdev, trig)) { led_cdev->flags |= LED_INIT_DEFAULT_TRIGGER; led_trigger_set(led_cdev, trig); return true; } return false; } void led_trigger_set_default(struct led_classdev *led_cdev) { struct led_trigger *trig; bool found = false; if (!led_cdev->default_trigger) return; if (!strcmp(led_cdev->default_trigger, "none")) { led_trigger_remove(led_cdev); return; } down_read(&triggers_list_lock); down_write(&led_cdev->trigger_lock); list_for_each_entry(trig, &trigger_list, next_trig) { found = led_match_default_trigger(led_cdev, trig); if (found) break; } up_write(&led_cdev->trigger_lock); up_read(&triggers_list_lock); /* * If default trigger wasn't found, maybe trigger module isn't loaded yet. * Once loaded it will re-probe with all led_cdev's. */ if (!found) request_module_nowait("ledtrig:%s", led_cdev->default_trigger); } EXPORT_SYMBOL_GPL(led_trigger_set_default); /* LED Trigger Interface */ int led_trigger_register(struct led_trigger *trig) { struct led_classdev *led_cdev; struct led_trigger *_trig; spin_lock_init(&trig->leddev_list_lock); INIT_LIST_HEAD(&trig->led_cdevs); down_write(&triggers_list_lock); /* Make sure the trigger's name isn't already in use */ list_for_each_entry(_trig, &trigger_list, next_trig) { if (!strcmp(_trig->name, trig->name) && (trig->trigger_type == _trig->trigger_type || !trig->trigger_type || !_trig->trigger_type)) { up_write(&triggers_list_lock); return -EEXIST; } } /* Add to the list of led triggers */ list_add_tail(&trig->next_trig, &trigger_list); up_write(&triggers_list_lock); /* Register with any LEDs that have this as a default trigger */ down_read(&leds_list_lock); list_for_each_entry(led_cdev, &leds_list, node) { down_write(&led_cdev->trigger_lock); if (!led_cdev->trigger && led_cdev->default_trigger) led_match_default_trigger(led_cdev, trig); up_write(&led_cdev->trigger_lock); } up_read(&leds_list_lock); return 0; } EXPORT_SYMBOL_GPL(led_trigger_register); void led_trigger_unregister(struct led_trigger *trig) { struct led_classdev *led_cdev; if (list_empty_careful(&trig->next_trig)) return; /* Remove from the list of led triggers */ down_write(&triggers_list_lock); list_del_init(&trig->next_trig); up_write(&triggers_list_lock); /* Remove anyone actively using this trigger */ down_read(&leds_list_lock); list_for_each_entry(led_cdev, &leds_list, node) { down_write(&led_cdev->trigger_lock); if (led_cdev->trigger == trig) led_trigger_set(led_cdev, NULL); up_write(&led_cdev->trigger_lock); } up_read(&leds_list_lock); } EXPORT_SYMBOL_GPL(led_trigger_unregister); static void devm_led_trigger_release(struct device *dev, void *res) { led_trigger_unregister(*(struct led_trigger **)res); } int devm_led_trigger_register(struct device *dev, struct led_trigger *trig) { struct led_trigger **dr; int rc; dr = devres_alloc(devm_led_trigger_release, sizeof(*dr), GFP_KERNEL); if (!dr) return -ENOMEM; *dr = trig; rc = led_trigger_register(trig); if (rc) devres_free(dr); else devres_add(dev, dr); return rc; } EXPORT_SYMBOL_GPL(devm_led_trigger_register); /* Simple LED Trigger Interface */ void led_trigger_event(struct led_trigger *trig, enum led_brightness brightness) { struct led_classdev *led_cdev; if (!trig) return; trig->brightness = brightness; rcu_read_lock(); list_for_each_entry_rcu(led_cdev, &trig->led_cdevs, trig_list) led_set_brightness(led_cdev, brightness); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(led_trigger_event); void led_mc_trigger_event(struct led_trigger *trig, unsigned int *intensity_value, unsigned int num_colors, enum led_brightness brightness) { struct led_classdev *led_cdev; if (!trig) return; rcu_read_lock(); list_for_each_entry_rcu(led_cdev, &trig->led_cdevs, trig_list) { if (!(led_cdev->flags & LED_MULTI_COLOR)) continue; led_mc_set_brightness(led_cdev, intensity_value, num_colors, brightness); } rcu_read_unlock(); } EXPORT_SYMBOL_GPL(led_mc_trigger_event); static void led_trigger_blink_setup(struct led_trigger *trig, unsigned long delay_on, unsigned long delay_off, int oneshot, int invert) { struct led_classdev *led_cdev; if (!trig) return; rcu_read_lock(); list_for_each_entry_rcu(led_cdev, &trig->led_cdevs, trig_list) { if (oneshot) led_blink_set_oneshot(led_cdev, &delay_on, &delay_off, invert); else led_blink_set_nosleep(led_cdev, delay_on, delay_off); } rcu_read_unlock(); } void led_trigger_blink(struct led_trigger *trig, unsigned long delay_on, unsigned long delay_off) { led_trigger_blink_setup(trig, delay_on, delay_off, 0, 0); } EXPORT_SYMBOL_GPL(led_trigger_blink); void led_trigger_blink_oneshot(struct led_trigger *trig, unsigned long delay_on, unsigned long delay_off, int invert) { led_trigger_blink_setup(trig, delay_on, delay_off, 1, invert); } EXPORT_SYMBOL_GPL(led_trigger_blink_oneshot); void led_trigger_register_simple(const char *name, struct led_trigger **tp) { struct led_trigger *trig; int err; trig = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); if (trig) { trig->name = name; err = led_trigger_register(trig); if (err < 0) { kfree(trig); trig = NULL; pr_warn("LED trigger %s failed to register (%d)\n", name, err); } } else { pr_warn("LED trigger %s failed to register (no memory)\n", name); } *tp = trig; } EXPORT_SYMBOL_GPL(led_trigger_register_simple); void led_trigger_unregister_simple(struct led_trigger *trig) { if (trig) led_trigger_unregister(trig); kfree(trig); } EXPORT_SYMBOL_GPL(led_trigger_unregister_simple);
8 8 3 5 6 6 4 5 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 // SPDX-License-Identifier: GPL-2.0-only /* * Transparent proxy support for Linux/iptables * * Copyright (c) 2006-2010 BalaBit IT Ltd. * Author: Balazs Scheidler, Krisztian Kovacs */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <net/checksum.h> #include <net/udp.h> #include <net/tcp.h> #include <net/inet_sock.h> #include <net/inet_hashtables.h> #include <linux/inetdevice.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <net/netfilter/ipv4/nf_defrag_ipv4.h> #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) #define XT_TPROXY_HAVE_IPV6 1 #include <net/if_inet6.h> #include <net/addrconf.h> #include <net/inet6_hashtables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #endif #include <net/netfilter/nf_tproxy.h> #include <linux/netfilter/xt_TPROXY.h> static unsigned int tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport, u_int32_t mark_mask, u_int32_t mark_value) { const struct iphdr *iph = ip_hdr(skb); struct udphdr _hdr, *hp; struct sock *sk; hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); if (hp == NULL) return NF_DROP; /* check if there's an ongoing connection on the packet * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ sk = nf_tproxy_get_sock_v4(net, skb, iph->protocol, iph->saddr, iph->daddr, hp->source, hp->dest, skb->dev, NF_TPROXY_LOOKUP_ESTABLISHED); laddr = nf_tproxy_laddr4(skb, laddr, iph->daddr); if (!lport) lport = hp->dest; /* UDP has no TCP_TIME_WAIT state, so we never enter here */ if (sk && sk->sk_state == TCP_TIME_WAIT) /* reopening a TIME_WAIT connection needs special handling */ sk = nf_tproxy_handle_time_wait4(net, skb, laddr, lport, sk); else if (!sk) /* no, there's no established connection, check if * there's a listener on the redirected addr/port */ sk = nf_tproxy_get_sock_v4(net, skb, iph->protocol, iph->saddr, laddr, hp->source, lport, skb->dev, NF_TPROXY_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ if (sk && nf_tproxy_sk_is_transparent(sk)) { /* This should be in a separate target, but we don't do multiple targets on the same rule yet */ skb->mark = (skb->mark & ~mark_mask) ^ mark_value; nf_tproxy_assign_sock(skb, sk); return NF_ACCEPT; } return NF_DROP; } static unsigned int tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info *tgi = par->targinfo; return tproxy_tg4(xt_net(par), skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value); } static unsigned int tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; return tproxy_tg4(xt_net(par), skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); } #ifdef XT_TPROXY_HAVE_IPV6 static unsigned int tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct ipv6hdr *iph = ipv6_hdr(skb); const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; struct udphdr _hdr, *hp; struct sock *sk; const struct in6_addr *laddr; __be16 lport; int thoff = 0; int tproto; tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); if (tproto < 0) return NF_DROP; hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); if (!hp) return NF_DROP; /* check if there's an ongoing connection on the packet * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, tproto, &iph->saddr, &iph->daddr, hp->source, hp->dest, xt_in(par), NF_TPROXY_LOOKUP_ESTABLISHED); laddr = nf_tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr); lport = tgi->lport ? tgi->lport : hp->dest; /* UDP has no TCP_TIME_WAIT state, so we never enter here */ if (sk && sk->sk_state == TCP_TIME_WAIT) { const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; /* reopening a TIME_WAIT connection needs special handling */ sk = nf_tproxy_handle_time_wait6(skb, tproto, thoff, xt_net(par), &tgi->laddr.in6, tgi->lport, sk); } else if (!sk) /* no there's no established connection, check if * there's a listener on the redirected addr/port */ sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, tproto, &iph->saddr, laddr, hp->source, lport, xt_in(par), NF_TPROXY_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ if (sk && nf_tproxy_sk_is_transparent(sk)) { /* This should be in a separate target, but we don't do multiple targets on the same rule yet */ skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value; nf_tproxy_assign_sock(skb, sk); return NF_ACCEPT; } return NF_DROP; } static int tproxy_tg6_check(const struct xt_tgchk_param *par) { const struct ip6t_ip6 *i = par->entryinfo; int err; err = nf_defrag_ipv6_enable(par->net); if (err) return err; if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) && !(i->invflags & IP6T_INV_PROTO)) return 0; pr_info_ratelimited("Can be used only with -p tcp or -p udp\n"); return -EINVAL; } static void tproxy_tg6_destroy(const struct xt_tgdtor_param *par) { nf_defrag_ipv6_disable(par->net); } #endif static int tproxy_tg4_check(const struct xt_tgchk_param *par) { const struct ipt_ip *i = par->entryinfo; int err; err = nf_defrag_ipv4_enable(par->net); if (err) return err; if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) && !(i->invflags & IPT_INV_PROTO)) return 0; pr_info_ratelimited("Can be used only with -p tcp or -p udp\n"); return -EINVAL; } static void tproxy_tg4_destroy(const struct xt_tgdtor_param *par) { nf_defrag_ipv4_disable(par->net); } static struct xt_target tproxy_tg_reg[] __read_mostly = { { .name = "TPROXY", .family = NFPROTO_IPV4, .table = "mangle", .target = tproxy_tg4_v0, .revision = 0, .targetsize = sizeof(struct xt_tproxy_target_info), .checkentry = tproxy_tg4_check, .destroy = tproxy_tg4_destroy, .hooks = 1 << NF_INET_PRE_ROUTING, .me = THIS_MODULE, }, { .name = "TPROXY", .family = NFPROTO_IPV4, .table = "mangle", .target = tproxy_tg4_v1, .revision = 1, .targetsize = sizeof(struct xt_tproxy_target_info_v1), .checkentry = tproxy_tg4_check, .destroy = tproxy_tg4_destroy, .hooks = 1 << NF_INET_PRE_ROUTING, .me = THIS_MODULE, }, #ifdef XT_TPROXY_HAVE_IPV6 { .name = "TPROXY", .family = NFPROTO_IPV6, .table = "mangle", .target = tproxy_tg6_v1, .revision = 1, .targetsize = sizeof(struct xt_tproxy_target_info_v1), .checkentry = tproxy_tg6_check, .destroy = tproxy_tg6_destroy, .hooks = 1 << NF_INET_PRE_ROUTING, .me = THIS_MODULE, }, #endif }; static int __init tproxy_tg_init(void) { return xt_register_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg)); } static void __exit tproxy_tg_exit(void) { xt_unregister_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg)); } module_init(tproxy_tg_init); module_exit(tproxy_tg_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs"); MODULE_DESCRIPTION("Netfilter transparent proxy (TPROXY) target module."); MODULE_ALIAS("ipt_TPROXY"); MODULE_ALIAS("ip6t_TPROXY");
5 5 215 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 // SPDX-License-Identifier: GPL-2.0 /* * This module exports the functions: * * 'int set_selection_user(struct tiocl_selection __user *, * struct tty_struct *)' * 'int set_selection_kernel(struct tiocl_selection *, struct tty_struct *)' * 'void clear_selection(void)' * 'int paste_selection(struct tty_struct *)' * 'int sel_loadlut(u32 __user *)' * * Now that /dev/vcs exists, most of this can disappear again. */ #include <linux/module.h> #include <linux/tty.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/uaccess.h> #include <linux/kbd_kern.h> #include <linux/vt_kern.h> #include <linux/consolemap.h> #include <linux/selection.h> #include <linux/tiocl.h> #include <linux/console.h> #include <linux/tty_flip.h> #include <linux/sched/signal.h> /* Don't take this from <ctype.h>: 011-015 on the screen aren't spaces */ #define is_space_on_vt(c) ((c) == ' ') /* FIXME: all this needs locking */ static struct vc_selection { struct mutex lock; struct vc_data *cons; /* must not be deallocated */ char *buffer; unsigned int buf_len; volatile int start; /* cleared by clear_selection */ int end; } vc_sel = { .lock = __MUTEX_INITIALIZER(vc_sel.lock), .start = -1, }; /* clear_selection, highlight and highlight_pointer can be called from interrupt (via scrollback/front) */ /* set reverse video on characters s-e of console with selection. */ static inline void highlight(const int s, const int e) { invert_screen(vc_sel.cons, s, e-s+2, true); } /* use complementary color to show the pointer */ static inline void highlight_pointer(const int where) { complement_pos(vc_sel.cons, where); } static u32 sel_pos(int n, bool unicode) { if (unicode) return screen_glyph_unicode(vc_sel.cons, n / 2); return inverse_translate(vc_sel.cons, screen_glyph(vc_sel.cons, n), false); } /** * clear_selection - remove current selection * * Remove the current selection highlight, if any from the console holding the * selection. * * Locking: The caller must hold the console lock. */ void clear_selection(void) { highlight_pointer(-1); /* hide the pointer */ if (vc_sel.start != -1) { highlight(vc_sel.start, vc_sel.end); vc_sel.start = -1; } } EXPORT_SYMBOL_GPL(clear_selection); bool vc_is_sel(const struct vc_data *vc) { return vc == vc_sel.cons; } /* * User settable table: what characters are to be considered alphabetic? * 128 bits. Locked by the console lock. */ static u32 inwordLut[]={ 0x00000000, /* control chars */ 0x03FFE000, /* digits and "-./" */ 0x87FFFFFE, /* uppercase and '_' */ 0x07FFFFFE, /* lowercase */ }; static inline int inword(const u32 c) { return c > 0x7f || (( inwordLut[c>>5] >> (c & 0x1F) ) & 1); } /** * sel_loadlut() - load the LUT table * @lut: user table * * Load the LUT table from user space. Make a temporary copy so a partial * update doesn't make a mess. * * Locking: The console lock is acquired. */ int sel_loadlut(u32 __user *lut) { u32 tmplut[ARRAY_SIZE(inwordLut)]; if (copy_from_user(tmplut, lut, sizeof(inwordLut))) return -EFAULT; console_lock(); memcpy(inwordLut, tmplut, sizeof(inwordLut)); console_unlock(); return 0; } /* does screen address p correspond to character at LH/RH edge of screen? */ static inline int atedge(const int p, int size_row) { return (!(p % size_row) || !((p + 2) % size_row)); } /* stores the char in UTF8 and returns the number of bytes used (1-4) */ static int store_utf8(u32 c, char *p) { if (c < 0x80) { /* 0******* */ p[0] = c; return 1; } else if (c < 0x800) { /* 110***** 10****** */ p[0] = 0xc0 | (c >> 6); p[1] = 0x80 | (c & 0x3f); return 2; } else if (c < 0x10000) { /* 1110**** 10****** 10****** */ p[0] = 0xe0 | (c >> 12); p[1] = 0x80 | ((c >> 6) & 0x3f); p[2] = 0x80 | (c & 0x3f); return 3; } else if (c < 0x110000) { /* 11110*** 10****** 10****** 10****** */ p[0] = 0xf0 | (c >> 18); p[1] = 0x80 | ((c >> 12) & 0x3f); p[2] = 0x80 | ((c >> 6) & 0x3f); p[3] = 0x80 | (c & 0x3f); return 4; } else { /* outside Unicode, replace with U+FFFD */ p[0] = 0xef; p[1] = 0xbf; p[2] = 0xbd; return 3; } } /** * set_selection_user - set the current selection. * @sel: user selection info * @tty: the console tty * * Invoked by the ioctl handle for the vt layer. * * Locking: The entire selection process is managed under the console_lock. * It's a lot under the lock but its hardly a performance path. */ int set_selection_user(const struct tiocl_selection __user *sel, struct tty_struct *tty) { struct tiocl_selection v; if (copy_from_user(&v, sel, sizeof(*sel))) return -EFAULT; /* * TIOCL_SELCLEAR and TIOCL_SELPOINTER are OK to use without * CAP_SYS_ADMIN as they do not modify the selection. */ switch (v.sel_mode) { case TIOCL_SELCLEAR: case TIOCL_SELPOINTER: break; default: if (!capable(CAP_SYS_ADMIN)) return -EPERM; } return set_selection_kernel(&v, tty); } static int vc_selection_store_chars(struct vc_data *vc, bool unicode) { char *bp, *obp; unsigned int i; /* Allocate a new buffer before freeing the old one ... */ /* chars can take up to 4 bytes with unicode */ bp = kmalloc_array((vc_sel.end - vc_sel.start) / 2 + 1, unicode ? 4 : 1, GFP_KERNEL | __GFP_NOWARN); if (!bp) { printk(KERN_WARNING "selection: kmalloc() failed\n"); clear_selection(); return -ENOMEM; } kfree(vc_sel.buffer); vc_sel.buffer = bp; obp = bp; for (i = vc_sel.start; i <= vc_sel.end; i += 2) { u32 c = sel_pos(i, unicode); if (unicode) bp += store_utf8(c, bp); else *bp++ = c; if (!is_space_on_vt(c)) obp = bp; if (!((i + 2) % vc->vc_size_row)) { /* strip trailing blanks from line and add newline, unless non-space at end of line. */ if (obp != bp) { bp = obp; *bp++ = '\r'; } obp = bp; } } vc_sel.buf_len = bp - vc_sel.buffer; return 0; } static int vc_do_selection(struct vc_data *vc, unsigned short mode, int ps, int pe) { int new_sel_start, new_sel_end, spc; bool unicode = vt_do_kdgkbmode(fg_console) == K_UNICODE; switch (mode) { case TIOCL_SELCHAR: /* character-by-character selection */ new_sel_start = ps; new_sel_end = pe; break; case TIOCL_SELWORD: /* word-by-word selection */ spc = is_space_on_vt(sel_pos(ps, unicode)); for (new_sel_start = ps; ; ps -= 2) { if ((spc && !is_space_on_vt(sel_pos(ps, unicode))) || (!spc && !inword(sel_pos(ps, unicode)))) break; new_sel_start = ps; if (!(ps % vc->vc_size_row)) break; } spc = is_space_on_vt(sel_pos(pe, unicode)); for (new_sel_end = pe; ; pe += 2) { if ((spc && !is_space_on_vt(sel_pos(pe, unicode))) || (!spc && !inword(sel_pos(pe, unicode)))) break; new_sel_end = pe; if (!((pe + 2) % vc->vc_size_row)) break; } break; case TIOCL_SELLINE: /* line-by-line selection */ new_sel_start = rounddown(ps, vc->vc_size_row); new_sel_end = rounddown(pe, vc->vc_size_row) + vc->vc_size_row - 2; break; case TIOCL_SELPOINTER: highlight_pointer(pe); return 0; default: return -EINVAL; } /* remove the pointer */ highlight_pointer(-1); /* select to end of line if on trailing space */ if (new_sel_end > new_sel_start && !atedge(new_sel_end, vc->vc_size_row) && is_space_on_vt(sel_pos(new_sel_end, unicode))) { for (pe = new_sel_end + 2; ; pe += 2) if (!is_space_on_vt(sel_pos(pe, unicode)) || atedge(pe, vc->vc_size_row)) break; if (is_space_on_vt(sel_pos(pe, unicode))) new_sel_end = pe; } if (vc_sel.start == -1) /* no current selection */ highlight(new_sel_start, new_sel_end); else if (new_sel_start == vc_sel.start) { if (new_sel_end == vc_sel.end) /* no action required */ return 0; else if (new_sel_end > vc_sel.end) /* extend to right */ highlight(vc_sel.end + 2, new_sel_end); else /* contract from right */ highlight(new_sel_end + 2, vc_sel.end); } else if (new_sel_end == vc_sel.end) { if (new_sel_start < vc_sel.start) /* extend to left */ highlight(new_sel_start, vc_sel.start - 2); else /* contract from left */ highlight(vc_sel.start, new_sel_start - 2); } else /* some other case; start selection from scratch */ { clear_selection(); highlight(new_sel_start, new_sel_end); } vc_sel.start = new_sel_start; vc_sel.end = new_sel_end; return vc_selection_store_chars(vc, unicode); } static int vc_selection(struct vc_data *vc, struct tiocl_selection *v, struct tty_struct *tty) { int ps, pe; poke_blanked_console(); if (v->sel_mode == TIOCL_SELCLEAR) { /* useful for screendump without selection highlights */ clear_selection(); return 0; } v->xs = min_t(u16, v->xs - 1, vc->vc_cols - 1); v->ys = min_t(u16, v->ys - 1, vc->vc_rows - 1); v->xe = min_t(u16, v->xe - 1, vc->vc_cols - 1); v->ye = min_t(u16, v->ye - 1, vc->vc_rows - 1); if (mouse_reporting() && (v->sel_mode & TIOCL_SELMOUSEREPORT)) { mouse_report(tty, v->sel_mode & TIOCL_SELBUTTONMASK, v->xs, v->ys); return 0; } ps = v->ys * vc->vc_size_row + (v->xs << 1); pe = v->ye * vc->vc_size_row + (v->xe << 1); if (ps > pe) /* make vc_sel.start <= vc_sel.end */ swap(ps, pe); if (vc_sel.cons != vc) { clear_selection(); vc_sel.cons = vc; } return vc_do_selection(vc, v->sel_mode, ps, pe); } int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) { int ret; mutex_lock(&vc_sel.lock); console_lock(); ret = vc_selection(vc_cons[fg_console].d, v, tty); console_unlock(); mutex_unlock(&vc_sel.lock); return ret; } EXPORT_SYMBOL_GPL(set_selection_kernel); /* Insert the contents of the selection buffer into the * queue of the tty associated with the current console. * Invoked by ioctl(). * * Locking: called without locks. Calls the ldisc wrongly with * unsafe methods, */ int paste_selection(struct tty_struct *tty) { struct vc_data *vc = tty->driver_data; int pasted = 0; size_t count; struct tty_ldisc *ld; DECLARE_WAITQUEUE(wait, current); int ret = 0; bool bp = vc->vc_bracketed_paste; static const char bracketed_paste_start[] = "\033[200~"; static const char bracketed_paste_end[] = "\033[201~"; const char *bps = bp ? bracketed_paste_start : NULL; const char *bpe = bp ? bracketed_paste_end : NULL; console_lock(); poke_blanked_console(); console_unlock(); ld = tty_ldisc_ref_wait(tty); if (!ld) return -EIO; /* ldisc was hung up */ tty_buffer_lock_exclusive(&vc->port); add_wait_queue(&vc->paste_wait, &wait); mutex_lock(&vc_sel.lock); while (vc_sel.buffer && (vc_sel.buf_len > pasted || bpe)) { set_current_state(TASK_INTERRUPTIBLE); if (signal_pending(current)) { ret = -EINTR; break; } if (tty_throttled(tty)) { mutex_unlock(&vc_sel.lock); schedule(); mutex_lock(&vc_sel.lock); continue; } __set_current_state(TASK_RUNNING); if (bps) { bps += tty_ldisc_receive_buf(ld, bps, NULL, strlen(bps)); if (*bps != '\0') continue; bps = NULL; } count = vc_sel.buf_len - pasted; if (count) { pasted += tty_ldisc_receive_buf(ld, vc_sel.buffer + pasted, NULL, count); if (vc_sel.buf_len > pasted) continue; } if (bpe) { bpe += tty_ldisc_receive_buf(ld, bpe, NULL, strlen(bpe)); if (*bpe == '\0') bpe = NULL; } } mutex_unlock(&vc_sel.lock); remove_wait_queue(&vc->paste_wait, &wait); __set_current_state(TASK_RUNNING); tty_buffer_unlock_exclusive(&vc->port); tty_ldisc_deref(ld); return ret; } EXPORT_SYMBOL_GPL(paste_selection);
2 2 2 6 1 5 3 3 4 2 1 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 // SPDX-License-Identifier: GPL-2.0 /* Driver for Microtek Scanmaker X6 USB scanner, and possibly others. * * (C) Copyright 2000 John Fremlin <vii@penguinpowered.com> * (C) Copyright 2000 Oliver Neukum <Oliver.Neukum@lrz.uni-muenchen.de> * * Parts shamelessly stolen from usb-storage and copyright by their * authors. Thanks to Matt Dharm for giving us permission! * * This driver implements a SCSI host controller driver and a USB * device driver. To avoid confusion, all the USB related stuff is * prefixed by mts_usb_ and all the SCSI stuff by mts_scsi_. * * Microtek (www.microtek.com) did not release the specifications for * their USB protocol to us, so we had to reverse engineer them. We * don't know for which models they are valid. * * The X6 USB has three bulk endpoints, one output (0x1) down which * commands and outgoing data are sent, and two input: 0x82 from which * normal data is read from the scanner (in packets of maximum 32 * bytes) and from which the status byte is read, and 0x83 from which * the results of a scan (or preview) are read in up to 64 * 1024 byte * chunks by the Windows driver. We don't know how much it is possible * to read at a time from 0x83. * * It seems possible to read (with URB transfers) everything from 0x82 * in one go, without bothering to read in 32 byte chunks. * * There seems to be an optimisation of a further READ implicit if * you simply read from 0x83. * * Guessed protocol: * * Send raw SCSI command to EP 0x1 * * If there is data to receive: * If the command was READ datatype=image: * Read a lot of data from EP 0x83 * Else: * Read data from EP 0x82 * Else: * If there is data to transmit: * Write it to EP 0x1 * * Read status byte from EP 0x82 * * References: * * The SCSI command set for the scanner is available from * ftp://ftp.microtek.com/microtek/devpack/ * * Microtek NV sent us a more up to date version of the document. If * you want it, just send mail. * * Status: * * Untested with multiple scanners. * Untested on SMP. * Untested on a bigendian machine. * * History: * * 20000417 starting history * 20000417 fixed load oops * 20000417 fixed unload oops * 20000419 fixed READ IMAGE detection * 20000424 started conversion to use URBs * 20000502 handled short transfers as errors * 20000513 rename and organisation of functions (john) * 20000513 added IDs for all products supported by Windows driver (john) * 20000514 Rewrote mts_scsi_queuecommand to use URBs (john) * 20000514 Version 0.0.8j * 20000514 Fix reporting of non-existent devices to SCSI layer (john) * 20000514 Added MTS_DEBUG_INT (john) * 20000514 Changed "usb-microtek" to "microtek" for consistency (john) * 20000514 Stupid bug fixes (john) * 20000514 Version 0.0.9j * 20000515 Put transfer context and URB in mts_desc (john) * 20000515 Added prelim turn off debugging support (john) * 20000515 Version 0.0.10j * 20000515 Fixed up URB allocation (clear URB on alloc) (john) * 20000515 Version 0.0.11j * 20000516 Removed unnecessary spinlock in mts_transfer_context (john) * 20000516 Removed unnecessary up on instance lock in mts_remove_nolock (john) * 20000516 Implemented (badly) scsi_abort (john) * 20000516 Version 0.0.12j * 20000517 Hopefully removed mts_remove_nolock quasideadlock (john) * 20000517 Added mts_debug_dump to print ll USB info (john) * 20000518 Tweaks and documentation updates (john) * 20000518 Version 0.0.13j * 20000518 Cleaned up abort handling (john) * 20000523 Removed scsi_command and various scsi_..._resets (john) * 20000523 Added unlink URB on scsi_abort, now OHCI supports it (john) * 20000523 Fixed last tiresome compile warning (john) * 20000523 Version 0.0.14j (though version 0.1 has come out?) * 20000602 Added primitive reset * 20000602 Version 0.2.0 * 20000603 various cosmetic changes * 20000603 Version 0.2.1 * 20000620 minor cosmetic changes * 20000620 Version 0.2.2 * 20000822 Hopefully fixed deadlock in mts_remove_nolock() * 20000822 Fixed minor race in mts_transfer_cleanup() * 20000822 Fixed deadlock on submission error in queuecommand * 20000822 Version 0.2.3 * 20000913 Reduced module size if debugging is off * 20000913 Version 0.2.4 * 20010210 New abort logic * 20010210 Version 0.3.0 * 20010217 Merged scatter/gather * 20010218 Version 0.4.0 * 20010218 Cosmetic fixes * 20010218 Version 0.4.1 * 20010306 Abort while using scatter/gather * 20010306 Version 0.4.2 * 20010311 Remove all timeouts and tidy up generally (john) * 20010320 check return value of scsi_register() * 20010320 Version 0.4.3 * 20010408 Identify version on module load. * 20011003 Fix multiple requests */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/signal.h> #include <linux/errno.h> #include <linux/random.h> #include <linux/poll.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/usb.h> #include <linux/proc_fs.h> #include <linux/atomic.h> #include <linux/blkdev.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_device.h> #include <scsi/scsi_eh.h> #include <scsi/scsi_host.h> #include <scsi/scsi_tcq.h> #include "microtek.h" #define DRIVER_AUTHOR "John Fremlin <vii@penguinpowered.com>, Oliver Neukum <Oliver.Neukum@lrz.uni-muenchen.de>" #define DRIVER_DESC "Microtek Scanmaker X6 USB scanner driver" /* Should we do debugging? */ //#define MTS_DO_DEBUG /* USB layer driver interface */ static int mts_usb_probe(struct usb_interface *intf, const struct usb_device_id *id); static void mts_usb_disconnect(struct usb_interface *intf); static const struct usb_device_id mts_usb_ids[]; static struct usb_driver mts_usb_driver = { .name = "microtekX6", .probe = mts_usb_probe, .disconnect = mts_usb_disconnect, .id_table = mts_usb_ids, }; /* Internal driver stuff */ #define MTS_VERSION "0.4.3" #define MTS_NAME "microtek usb (rev " MTS_VERSION "): " #define MTS_WARNING(x...) \ printk( KERN_WARNING MTS_NAME x ) #define MTS_ERROR(x...) \ printk( KERN_ERR MTS_NAME x ) #define MTS_INT_ERROR(x...) \ MTS_ERROR(x) #define MTS_MESSAGE(x...) \ printk( KERN_INFO MTS_NAME x ) #if defined MTS_DO_DEBUG #define MTS_DEBUG(x...) \ printk( KERN_DEBUG MTS_NAME x ) #define MTS_DEBUG_GOT_HERE() \ MTS_DEBUG("got to %s:%d (%s)\n", __FILE__, (int)__LINE__, __func__ ) #define MTS_DEBUG_INT() \ do { MTS_DEBUG_GOT_HERE(); \ MTS_DEBUG("transfer = 0x%x context = 0x%x\n",(int)transfer,(int)context ); \ MTS_DEBUG("status = 0x%x data-length = 0x%x sent = 0x%x\n",transfer->status,(int)context->data_length, (int)transfer->actual_length ); \ mts_debug_dump(context->instance);\ } while(0) #else #define MTS_NUL_STATEMENT do { } while(0) #define MTS_DEBUG(x...) MTS_NUL_STATEMENT #define MTS_DEBUG_GOT_HERE() MTS_NUL_STATEMENT #define MTS_DEBUG_INT() MTS_NUL_STATEMENT #endif #define MTS_INT_INIT()\ struct mts_transfer_context* context = (struct mts_transfer_context*)transfer->context; \ MTS_DEBUG_INT();\ #ifdef MTS_DO_DEBUG static inline void mts_debug_dump(struct mts_desc* desc) { MTS_DEBUG("desc at 0x%x: toggle = %02x%02x\n", (int)desc, (int)desc->usb_dev->toggle[1],(int)desc->usb_dev->toggle[0] ); MTS_DEBUG("ep_out=%x ep_response=%x ep_image=%x\n", usb_sndbulkpipe(desc->usb_dev,desc->ep_out), usb_rcvbulkpipe(desc->usb_dev,desc->ep_response), usb_rcvbulkpipe(desc->usb_dev,desc->ep_image) ); } static inline void mts_show_command(struct scsi_cmnd *srb) { char *what = NULL; switch (srb->cmnd[0]) { case TEST_UNIT_READY: what = "TEST_UNIT_READY"; break; case REZERO_UNIT: what = "REZERO_UNIT"; break; case REQUEST_SENSE: what = "REQUEST_SENSE"; break; case FORMAT_UNIT: what = "FORMAT_UNIT"; break; case READ_BLOCK_LIMITS: what = "READ_BLOCK_LIMITS"; break; case REASSIGN_BLOCKS: what = "REASSIGN_BLOCKS"; break; case READ_6: what = "READ_6"; break; case WRITE_6: what = "WRITE_6"; break; case SEEK_6: what = "SEEK_6"; break; case READ_REVERSE: what = "READ_REVERSE"; break; case WRITE_FILEMARKS: what = "WRITE_FILEMARKS"; break; case SPACE: what = "SPACE"; break; case INQUIRY: what = "INQUIRY"; break; case RECOVER_BUFFERED_DATA: what = "RECOVER_BUFFERED_DATA"; break; case MODE_SELECT: what = "MODE_SELECT"; break; case RESERVE: what = "RESERVE"; break; case RELEASE: what = "RELEASE"; break; case COPY: what = "COPY"; break; case ERASE: what = "ERASE"; break; case MODE_SENSE: what = "MODE_SENSE"; break; case START_STOP: what = "START_STOP"; break; case RECEIVE_DIAGNOSTIC: what = "RECEIVE_DIAGNOSTIC"; break; case SEND_DIAGNOSTIC: what = "SEND_DIAGNOSTIC"; break; case ALLOW_MEDIUM_REMOVAL: what = "ALLOW_MEDIUM_REMOVAL"; break; case SET_WINDOW: what = "SET_WINDOW"; break; case READ_CAPACITY: what = "READ_CAPACITY"; break; case READ_10: what = "READ_10"; break; case WRITE_10: what = "WRITE_10"; break; case SEEK_10: what = "SEEK_10"; break; case WRITE_VERIFY: what = "WRITE_VERIFY"; break; case VERIFY: what = "VERIFY"; break; case SEARCH_HIGH: what = "SEARCH_HIGH"; break; case SEARCH_EQUAL: what = "SEARCH_EQUAL"; break; case SEARCH_LOW: what = "SEARCH_LOW"; break; case SET_LIMITS: what = "SET_LIMITS"; break; case READ_POSITION: what = "READ_POSITION"; break; case SYNCHRONIZE_CACHE: what = "SYNCHRONIZE_CACHE"; break; case LOCK_UNLOCK_CACHE: what = "LOCK_UNLOCK_CACHE"; break; case READ_DEFECT_DATA: what = "READ_DEFECT_DATA"; break; case MEDIUM_SCAN: what = "MEDIUM_SCAN"; break; case COMPARE: what = "COMPARE"; break; case COPY_VERIFY: what = "COPY_VERIFY"; break; case WRITE_BUFFER: what = "WRITE_BUFFER"; break; case READ_BUFFER: what = "READ_BUFFER"; break; case UPDATE_BLOCK: what = "UPDATE_BLOCK"; break; case READ_LONG: what = "READ_LONG"; break; case WRITE_LONG: what = "WRITE_LONG"; break; case CHANGE_DEFINITION: what = "CHANGE_DEFINITION"; break; case WRITE_SAME: what = "WRITE_SAME"; break; case READ_TOC: what = "READ_TOC"; break; case LOG_SELECT: what = "LOG_SELECT"; break; case LOG_SENSE: what = "LOG_SENSE"; break; case MODE_SELECT_10: what = "MODE_SELECT_10"; break; case MODE_SENSE_10: what = "MODE_SENSE_10"; break; case MOVE_MEDIUM: what = "MOVE_MEDIUM"; break; case READ_12: what = "READ_12"; break; case WRITE_12: what = "WRITE_12"; break; case WRITE_VERIFY_12: what = "WRITE_VERIFY_12"; break; case SEARCH_HIGH_12: what = "SEARCH_HIGH_12"; break; case SEARCH_EQUAL_12: what = "SEARCH_EQUAL_12"; break; case SEARCH_LOW_12: what = "SEARCH_LOW_12"; break; case READ_ELEMENT_STATUS: what = "READ_ELEMENT_STATUS"; break; case SEND_VOLUME_TAG: what = "SEND_VOLUME_TAG"; break; case WRITE_LONG_2: what = "WRITE_LONG_2"; break; default: MTS_DEBUG("can't decode command\n"); goto out; break; } MTS_DEBUG( "Command %s (%d bytes)\n", what, srb->cmd_len); out: MTS_DEBUG( " %10ph\n", srb->cmnd); } #else static inline void mts_show_command(struct scsi_cmnd * dummy) { } static inline void mts_debug_dump(struct mts_desc* dummy) { } #endif static inline void mts_urb_abort(struct mts_desc* desc) { MTS_DEBUG_GOT_HERE(); mts_debug_dump(desc); usb_kill_urb( desc->urb ); } static int mts_sdev_init (struct scsi_device *s) { s->inquiry_len = 0x24; return 0; } static int mts_scsi_abort(struct scsi_cmnd *srb) { struct mts_desc* desc = (struct mts_desc*)(srb->device->host->hostdata[0]); MTS_DEBUG_GOT_HERE(); mts_urb_abort(desc); return FAILED; } static int mts_scsi_host_reset(struct scsi_cmnd *srb) { struct mts_desc* desc = (struct mts_desc*)(srb->device->host->hostdata[0]); int result; MTS_DEBUG_GOT_HERE(); mts_debug_dump(desc); result = usb_lock_device_for_reset(desc->usb_dev, desc->usb_intf); if (result == 0) { result = usb_reset_device(desc->usb_dev); usb_unlock_device(desc->usb_dev); } return result ? FAILED : SUCCESS; } static int mts_scsi_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *srb); static void mts_transfer_cleanup( struct urb *transfer ); static void mts_do_sg(struct urb * transfer); static inline void mts_int_submit_urb (struct urb* transfer, int pipe, void* data, unsigned length, usb_complete_t callback ) /* Interrupt context! */ /* Holding transfer->context->lock! */ { int res; MTS_INT_INIT(); usb_fill_bulk_urb(transfer, context->instance->usb_dev, pipe, data, length, callback, context ); res = usb_submit_urb( transfer, GFP_ATOMIC ); if ( unlikely(res) ) { MTS_INT_ERROR( "could not submit URB! Error was %d\n",(int)res ); set_host_byte(context->srb, DID_ERROR); mts_transfer_cleanup(transfer); } } static void mts_transfer_cleanup( struct urb *transfer ) /* Interrupt context! */ { MTS_INT_INIT(); if ( likely(context->final_callback != NULL) ) context->final_callback(context->srb); } static void mts_transfer_done( struct urb *transfer ) { MTS_INT_INIT(); context->srb->result &= MTS_SCSI_ERR_MASK; context->srb->result |= (unsigned)(*context->scsi_status)<<1; mts_transfer_cleanup(transfer); } static void mts_get_status( struct urb *transfer ) /* Interrupt context! */ { MTS_INT_INIT(); mts_int_submit_urb(transfer, usb_rcvbulkpipe(context->instance->usb_dev, context->instance->ep_response), context->scsi_status, 1, mts_transfer_done ); } static void mts_data_done( struct urb* transfer ) /* Interrupt context! */ { int status = transfer->status; MTS_INT_INIT(); if ( context->data_length != transfer->actual_length ) { scsi_set_resid(context->srb, context->data_length - transfer->actual_length); } else if ( unlikely(status) ) { set_host_byte(context->srb, (status == -ENOENT ? DID_ABORT : DID_ERROR)); } mts_get_status(transfer); } static void mts_command_done( struct urb *transfer ) /* Interrupt context! */ { int status = transfer->status; MTS_INT_INIT(); if ( unlikely(status) ) { if (status == -ENOENT) { /* We are being killed */ MTS_DEBUG_GOT_HERE(); set_host_byte(context->srb, DID_ABORT); } else { /* A genuine error has occurred */ MTS_DEBUG_GOT_HERE(); set_host_byte(context->srb, DID_ERROR); } mts_transfer_cleanup(transfer); return; } if (context->srb->cmnd[0] == REQUEST_SENSE) { mts_int_submit_urb(transfer, context->data_pipe, context->srb->sense_buffer, context->data_length, mts_data_done); } else { if ( context->data ) { mts_int_submit_urb(transfer, context->data_pipe, context->data, context->data_length, scsi_sg_count(context->srb) > 1 ? mts_do_sg : mts_data_done); } else { mts_get_status(transfer); } } } static void mts_do_sg (struct urb* transfer) { int status = transfer->status; MTS_INT_INIT(); MTS_DEBUG("Processing fragment %d of %d\n", context->fragment, scsi_sg_count(context->srb)); if (unlikely(status)) { set_host_byte(context->srb, (status == -ENOENT ? DID_ABORT : DID_ERROR)); mts_transfer_cleanup(transfer); } context->curr_sg = sg_next(context->curr_sg); mts_int_submit_urb(transfer, context->data_pipe, sg_virt(context->curr_sg), context->curr_sg->length, sg_is_last(context->curr_sg) ? mts_data_done : mts_do_sg); } static const u8 mts_read_image_sig[] = { 0x28, 00, 00, 00 }; static const u8 mts_read_image_sig_len = 4; static const unsigned char mts_direction[256/8] = { 0x28, 0x81, 0x14, 0x14, 0x20, 0x01, 0x90, 0x77, 0x0C, 0x20, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; #define MTS_DIRECTION_IS_IN(x) ((mts_direction[x>>3] >> (x & 7)) & 1) static void mts_build_transfer_context(struct scsi_cmnd *srb, struct mts_desc* desc) { int pipe; MTS_DEBUG_GOT_HERE(); desc->context.instance = desc; desc->context.srb = srb; if (!scsi_bufflen(srb)) { desc->context.data = NULL; desc->context.data_length = 0; return; } else { desc->context.curr_sg = scsi_sglist(srb); desc->context.data = sg_virt(desc->context.curr_sg); desc->context.data_length = desc->context.curr_sg->length; } /* can't rely on srb->sc_data_direction */ /* Brutally ripped from usb-storage */ if ( !memcmp( srb->cmnd, mts_read_image_sig, mts_read_image_sig_len ) ) { pipe = usb_rcvbulkpipe(desc->usb_dev,desc->ep_image); MTS_DEBUG( "transferring from desc->ep_image == %d\n", (int)desc->ep_image ); } else if ( MTS_DIRECTION_IS_IN(srb->cmnd[0]) ) { pipe = usb_rcvbulkpipe(desc->usb_dev,desc->ep_response); MTS_DEBUG( "transferring from desc->ep_response == %d\n", (int)desc->ep_response); } else { MTS_DEBUG("transferring to desc->ep_out == %d\n", (int)desc->ep_out); pipe = usb_sndbulkpipe(desc->usb_dev,desc->ep_out); } desc->context.data_pipe = pipe; } static int mts_scsi_queuecommand_lck(struct scsi_cmnd *srb) { mts_scsi_cmnd_callback callback = scsi_done; struct mts_desc* desc = (struct mts_desc*)(srb->device->host->hostdata[0]); int res; MTS_DEBUG_GOT_HERE(); mts_show_command(srb); mts_debug_dump(desc); if ( srb->device->lun || srb->device->id || srb->device->channel ) { MTS_DEBUG("Command to LUN=%d ID=%d CHANNEL=%d from SCSI layer\n",(int)srb->device->lun,(int)srb->device->id, (int)srb->device->channel ); MTS_DEBUG("this device doesn't exist\n"); set_host_byte(srb, DID_BAD_TARGET); if(likely(callback != NULL)) callback(srb); goto out; } usb_fill_bulk_urb(desc->urb, desc->usb_dev, usb_sndbulkpipe(desc->usb_dev,desc->ep_out), srb->cmnd, srb->cmd_len, mts_command_done, &desc->context ); mts_build_transfer_context( srb, desc ); desc->context.final_callback = callback; /* here we need ATOMIC as we are called with the iolock */ res=usb_submit_urb(desc->urb, GFP_ATOMIC); if(unlikely(res)){ MTS_ERROR("error %d submitting URB\n",(int)res); set_host_byte(srb, DID_ERROR); if(likely(callback != NULL)) callback(srb); } out: return 0; } static DEF_SCSI_QCMD(mts_scsi_queuecommand) static const struct scsi_host_template mts_scsi_host_template = { .module = THIS_MODULE, .name = "microtekX6", .proc_name = "microtekX6", .queuecommand = mts_scsi_queuecommand, .eh_abort_handler = mts_scsi_abort, .eh_host_reset_handler = mts_scsi_host_reset, .sg_tablesize = SG_ALL, .can_queue = 1, .this_id = -1, .emulated = 1, .dma_alignment = 511, .sdev_init = mts_sdev_init, .max_sectors= 256, /* 128 K */ }; /* The entries of microtek_table must correspond, line-by-line to the entries of mts_supported_products[]. */ static const struct usb_device_id mts_usb_ids[] = { { USB_DEVICE(0x4ce, 0x0300) }, { USB_DEVICE(0x5da, 0x0094) }, { USB_DEVICE(0x5da, 0x0099) }, { USB_DEVICE(0x5da, 0x009a) }, { USB_DEVICE(0x5da, 0x00a0) }, { USB_DEVICE(0x5da, 0x00a3) }, { USB_DEVICE(0x5da, 0x80a3) }, { USB_DEVICE(0x5da, 0x80ac) }, { USB_DEVICE(0x5da, 0x00b6) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE (usb, mts_usb_ids); static int mts_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { int i; int ep_out = -1; int ep_in_set[3]; /* this will break if we have more than three endpoints which is why we check */ int *ep_in_current = ep_in_set; int err_retval = -ENOMEM; struct mts_desc * new_desc; struct usb_device *dev = interface_to_usbdev (intf); /* the current altsetting on the interface we're probing */ struct usb_host_interface *altsetting; MTS_DEBUG_GOT_HERE(); MTS_DEBUG( "usb-device descriptor at %x\n", (int)dev ); MTS_DEBUG( "product id = 0x%x, vendor id = 0x%x\n", le16_to_cpu(dev->descriptor.idProduct), le16_to_cpu(dev->descriptor.idVendor) ); MTS_DEBUG_GOT_HERE(); /* the current altsetting on the interface we're probing */ altsetting = intf->cur_altsetting; /* Check if the config is sane */ if ( altsetting->desc.bNumEndpoints != MTS_EP_TOTAL ) { MTS_WARNING( "expecting %d got %d endpoints! Bailing out.\n", (int)MTS_EP_TOTAL, (int)altsetting->desc.bNumEndpoints ); return -ENODEV; } for( i = 0; i < altsetting->desc.bNumEndpoints; i++ ) { if ((altsetting->endpoint[i].desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) != USB_ENDPOINT_XFER_BULK) { MTS_WARNING( "can only deal with bulk endpoints; endpoint %d is not bulk.\n", (int)altsetting->endpoint[i].desc.bEndpointAddress ); } else { if (altsetting->endpoint[i].desc.bEndpointAddress & USB_DIR_IN) *ep_in_current++ = altsetting->endpoint[i].desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK; else { if ( ep_out != -1 ) { MTS_WARNING( "can only deal with one output endpoints. Bailing out." ); return -ENODEV; } ep_out = altsetting->endpoint[i].desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK; } } } if (ep_in_current != &ep_in_set[2]) { MTS_WARNING("couldn't find two input bulk endpoints. Bailing out.\n"); return -ENODEV; } if ( ep_out == -1 ) { MTS_WARNING( "couldn't find an output bulk endpoint. Bailing out.\n" ); return -ENODEV; } new_desc = kzalloc(sizeof(struct mts_desc), GFP_KERNEL); if (!new_desc) goto out; new_desc->urb = usb_alloc_urb(0, GFP_KERNEL); if (!new_desc->urb) goto out_kfree; new_desc->context.scsi_status = kmalloc(1, GFP_KERNEL); if (!new_desc->context.scsi_status) goto out_free_urb; new_desc->usb_dev = dev; new_desc->usb_intf = intf; /* endpoints */ new_desc->ep_out = ep_out; new_desc->ep_response = ep_in_set[0]; new_desc->ep_image = ep_in_set[1]; if ( new_desc->ep_out != MTS_EP_OUT ) MTS_WARNING( "will this work? Command EP is not usually %d\n", (int)new_desc->ep_out ); if ( new_desc->ep_response != MTS_EP_RESPONSE ) MTS_WARNING( "will this work? Response EP is not usually %d\n", (int)new_desc->ep_response ); if ( new_desc->ep_image != MTS_EP_IMAGE ) MTS_WARNING( "will this work? Image data EP is not usually %d\n", (int)new_desc->ep_image ); new_desc->host = scsi_host_alloc(&mts_scsi_host_template, sizeof(new_desc)); if (!new_desc->host) goto out_kfree2; new_desc->host->hostdata[0] = (unsigned long)new_desc; if (scsi_add_host(new_desc->host, &dev->dev)) { err_retval = -EIO; goto out_host_put; } scsi_scan_host(new_desc->host); usb_set_intfdata(intf, new_desc); return 0; out_host_put: scsi_host_put(new_desc->host); out_kfree2: kfree(new_desc->context.scsi_status); out_free_urb: usb_free_urb(new_desc->urb); out_kfree: kfree(new_desc); out: return err_retval; } static void mts_usb_disconnect (struct usb_interface *intf) { struct mts_desc *desc = usb_get_intfdata(intf); usb_set_intfdata(intf, NULL); usb_kill_urb(desc->urb); scsi_remove_host(desc->host); scsi_host_put(desc->host); usb_free_urb(desc->urb); kfree(desc->context.scsi_status); kfree(desc); } module_usb_driver(mts_usb_driver); MODULE_AUTHOR( DRIVER_AUTHOR ); MODULE_DESCRIPTION( DRIVER_DESC ); MODULE_LICENSE("GPL");
270 6 23 4 952 932 348 3 4 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SIGNAL_H #define _LINUX_SIGNAL_H #include <linux/bug.h> #include <linux/list.h> #include <linux/signal_types.h> #include <linux/string.h> struct task_struct; /* for sysctl */ extern int print_fatal_signals; static inline void copy_siginfo(kernel_siginfo_t *to, const kernel_siginfo_t *from) { memcpy(to, from, sizeof(*to)); } static inline void clear_siginfo(kernel_siginfo_t *info) { memset(info, 0, sizeof(*info)); } #define SI_EXPANSION_SIZE (sizeof(struct siginfo) - sizeof(struct kernel_siginfo)) static inline void copy_siginfo_to_external(siginfo_t *to, const kernel_siginfo_t *from) { memcpy(to, from, sizeof(*from)); memset(((char *)to) + sizeof(struct kernel_siginfo), 0, SI_EXPANSION_SIZE); } int copy_siginfo_to_user(siginfo_t __user *to, const kernel_siginfo_t *from); int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from); enum siginfo_layout { SIL_KILL, SIL_TIMER, SIL_POLL, SIL_FAULT, SIL_FAULT_TRAPNO, SIL_FAULT_MCEERR, SIL_FAULT_BNDERR, SIL_FAULT_PKUERR, SIL_FAULT_PERF_EVENT, SIL_CHLD, SIL_RT, SIL_SYS, }; enum siginfo_layout siginfo_layout(unsigned sig, int si_code); /* * Define some primitives to manipulate sigset_t. */ #ifndef __HAVE_ARCH_SIG_BITOPS #include <linux/bitops.h> /* We don't use <linux/bitops.h> for these because there is no need to be atomic. */ static inline void sigaddset(sigset_t *set, int _sig) { unsigned long sig = _sig - 1; if (_NSIG_WORDS == 1) set->sig[0] |= 1UL << sig; else set->sig[sig / _NSIG_BPW] |= 1UL << (sig % _NSIG_BPW); } static inline void sigdelset(sigset_t *set, int _sig) { unsigned long sig = _sig - 1; if (_NSIG_WORDS == 1) set->sig[0] &= ~(1UL << sig); else set->sig[sig / _NSIG_BPW] &= ~(1UL << (sig % _NSIG_BPW)); } static inline int sigismember(sigset_t *set, int _sig) { unsigned long sig = _sig - 1; if (_NSIG_WORDS == 1) return 1 & (set->sig[0] >> sig); else return 1 & (set->sig[sig / _NSIG_BPW] >> (sig % _NSIG_BPW)); } #endif /* __HAVE_ARCH_SIG_BITOPS */ static inline int sigisemptyset(sigset_t *set) { switch (_NSIG_WORDS) { case 4: return (set->sig[3] | set->sig[2] | set->sig[1] | set->sig[0]) == 0; case 2: return (set->sig[1] | set->sig[0]) == 0; case 1: return set->sig[0] == 0; default: BUILD_BUG(); return 0; } } static inline int sigequalsets(const sigset_t *set1, const sigset_t *set2) { switch (_NSIG_WORDS) { case 4: return (set1->sig[3] == set2->sig[3]) && (set1->sig[2] == set2->sig[2]) && (set1->sig[1] == set2->sig[1]) && (set1->sig[0] == set2->sig[0]); case 2: return (set1->sig[1] == set2->sig[1]) && (set1->sig[0] == set2->sig[0]); case 1: return set1->sig[0] == set2->sig[0]; } return 0; } #define sigmask(sig) (1UL << ((sig) - 1)) #ifndef __HAVE_ARCH_SIG_SETOPS #define _SIG_SET_BINOP(name, op) \ static inline void name(sigset_t *r, const sigset_t *a, const sigset_t *b) \ { \ unsigned long a0, a1, a2, a3, b0, b1, b2, b3; \ \ switch (_NSIG_WORDS) { \ case 4: \ a3 = a->sig[3]; a2 = a->sig[2]; \ b3 = b->sig[3]; b2 = b->sig[2]; \ r->sig[3] = op(a3, b3); \ r->sig[2] = op(a2, b2); \ fallthrough; \ case 2: \ a1 = a->sig[1]; b1 = b->sig[1]; \ r->sig[1] = op(a1, b1); \ fallthrough; \ case 1: \ a0 = a->sig[0]; b0 = b->sig[0]; \ r->sig[0] = op(a0, b0); \ break; \ default: \ BUILD_BUG(); \ } \ } #define _sig_or(x,y) ((x) | (y)) _SIG_SET_BINOP(sigorsets, _sig_or) #define _sig_and(x,y) ((x) & (y)) _SIG_SET_BINOP(sigandsets, _sig_and) #define _sig_andn(x,y) ((x) & ~(y)) _SIG_SET_BINOP(sigandnsets, _sig_andn) #undef _SIG_SET_BINOP #undef _sig_or #undef _sig_and #undef _sig_andn #define _SIG_SET_OP(name, op) \ static inline void name(sigset_t *set) \ { \ switch (_NSIG_WORDS) { \ case 4: set->sig[3] = op(set->sig[3]); \ set->sig[2] = op(set->sig[2]); \ fallthrough; \ case 2: set->sig[1] = op(set->sig[1]); \ fallthrough; \ case 1: set->sig[0] = op(set->sig[0]); \ break; \ default: \ BUILD_BUG(); \ } \ } #define _sig_not(x) (~(x)) _SIG_SET_OP(signotset, _sig_not) #undef _SIG_SET_OP #undef _sig_not static inline void sigemptyset(sigset_t *set) { switch (_NSIG_WORDS) { default: memset(set, 0, sizeof(sigset_t)); break; case 2: set->sig[1] = 0; fallthrough; case 1: set->sig[0] = 0; break; } } static inline void sigfillset(sigset_t *set) { switch (_NSIG_WORDS) { default: memset(set, -1, sizeof(sigset_t)); break; case 2: set->sig[1] = -1; fallthrough; case 1: set->sig[0] = -1; break; } } /* Some extensions for manipulating the low 32 signals in particular. */ static inline void sigaddsetmask(sigset_t *set, unsigned long mask) { set->sig[0] |= mask; } static inline void sigdelsetmask(sigset_t *set, unsigned long mask) { set->sig[0] &= ~mask; } static inline int sigtestsetmask(sigset_t *set, unsigned long mask) { return (set->sig[0] & mask) != 0; } static inline void siginitset(sigset_t *set, unsigned long mask) { set->sig[0] = mask; switch (_NSIG_WORDS) { default: memset(&set->sig[1], 0, sizeof(long)*(_NSIG_WORDS-1)); break; case 2: set->sig[1] = 0; break; case 1: ; } } static inline void siginitsetinv(sigset_t *set, unsigned long mask) { set->sig[0] = ~mask; switch (_NSIG_WORDS) { default: memset(&set->sig[1], -1, sizeof(long)*(_NSIG_WORDS-1)); break; case 2: set->sig[1] = -1; break; case 1: ; } } #endif /* __HAVE_ARCH_SIG_SETOPS */ static inline void init_sigpending(struct sigpending *sig) { sigemptyset(&sig->signal); INIT_LIST_HEAD(&sig->list); } extern void flush_sigqueue(struct sigpending *queue); /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ static inline int valid_signal(unsigned long sig) { return sig <= _NSIG ? 1 : 0; } struct timespec; struct pt_regs; enum pid_type; extern int next_signal(struct sigpending *pending, sigset_t *mask); extern int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, enum pid_type type); extern int group_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, enum pid_type type); extern int send_signal_locked(int sig, struct kernel_siginfo *info, struct task_struct *p, enum pid_type type); extern int sigprocmask(int, sigset_t *, sigset_t *); extern void set_current_blocked(sigset_t *); extern void __set_current_blocked(const sigset_t *); extern int show_unhandled_signals; extern bool get_signal(struct ksignal *ksig); extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping); extern void exit_signals(struct task_struct *tsk); extern void kernel_sigaction(int, __sighandler_t); #define SIG_KTHREAD ((__force __sighandler_t)2) #define SIG_KTHREAD_KERNEL ((__force __sighandler_t)3) static inline void allow_signal(int sig) { /* * Kernel threads handle their own signals. Let the signal code * know it'll be handled, so that they don't get converted to * SIGKILL or just silently dropped. */ kernel_sigaction(sig, SIG_KTHREAD); } static inline void allow_kernel_signal(int sig) { /* * Kernel threads handle their own signals. Let the signal code * know signals sent by the kernel will be handled, so that they * don't get silently dropped. */ kernel_sigaction(sig, SIG_KTHREAD_KERNEL); } static inline void disallow_signal(int sig) { kernel_sigaction(sig, SIG_IGN); } extern struct kmem_cache *sighand_cachep; extern bool unhandled_signal(struct task_struct *tsk, int sig); /* * In POSIX a signal is sent either to a specific thread (Linux task) * or to the process as a whole (Linux thread group). How the signal * is sent determines whether it's to one thread or the whole group, * which determines which signal mask(s) are involved in blocking it * from being delivered until later. When the signal is delivered, * either it's caught or ignored by a user handler or it has a default * effect that applies to the whole thread group (POSIX process). * * The possible effects an unblocked signal set to SIG_DFL can have are: * ignore - Nothing Happens * terminate - kill the process, i.e. all threads in the group, * similar to exit_group. The group leader (only) reports * WIFSIGNALED status to its parent. * coredump - write a core dump file describing all threads using * the same mm and then kill all those threads * stop - stop all the threads in the group, i.e. TASK_STOPPED state * * SIGKILL and SIGSTOP cannot be caught, blocked, or ignored. * Other signals when not blocked and set to SIG_DFL behaves as follows. * The job control signals also have other special effects. * * +--------------------+------------------+ * | POSIX signal | default action | * +--------------------+------------------+ * | SIGHUP | terminate | * | SIGINT | terminate | * | SIGQUIT | coredump | * | SIGILL | coredump | * | SIGTRAP | coredump | * | SIGABRT/SIGIOT | coredump | * | SIGBUS | coredump | * | SIGFPE | coredump | * | SIGKILL | terminate(+) | * | SIGUSR1 | terminate | * | SIGSEGV | coredump | * | SIGUSR2 | terminate | * | SIGPIPE | terminate | * | SIGALRM | terminate | * | SIGTERM | terminate | * | SIGCHLD | ignore | * | SIGCONT | ignore(*) | * | SIGSTOP | stop(*)(+) | * | SIGTSTP | stop(*) | * | SIGTTIN | stop(*) | * | SIGTTOU | stop(*) | * | SIGURG | ignore | * | SIGXCPU | coredump | * | SIGXFSZ | coredump | * | SIGVTALRM | terminate | * | SIGPROF | terminate | * | SIGPOLL/SIGIO | terminate | * | SIGSYS/SIGUNUSED | coredump | * | SIGSTKFLT | terminate | * | SIGWINCH | ignore | * | SIGPWR | terminate | * | SIGRTMIN-SIGRTMAX | terminate | * +--------------------+------------------+ * | non-POSIX signal | default action | * +--------------------+------------------+ * | SIGEMT | coredump | * +--------------------+------------------+ * * (+) For SIGKILL and SIGSTOP the action is "always", not just "default". * (*) Special job control effects: * When SIGCONT is sent, it resumes the process (all threads in the group) * from TASK_STOPPED state and also clears any pending/queued stop signals * (any of those marked with "stop(*)"). This happens regardless of blocking, * catching, or ignoring SIGCONT. When any stop signal is sent, it clears * any pending/queued SIGCONT signals; this happens regardless of blocking, * catching, or ignored the stop signal, though (except for SIGSTOP) the * default action of stopping the process may happen later or never. */ #ifdef SIGEMT #define SIGEMT_MASK rt_sigmask(SIGEMT) #else #define SIGEMT_MASK 0 #endif #if SIGRTMIN > BITS_PER_LONG #define rt_sigmask(sig) (1ULL << ((sig)-1)) #else #define rt_sigmask(sig) sigmask(sig) #endif #define siginmask(sig, mask) \ ((sig) > 0 && (sig) < SIGRTMIN && (rt_sigmask(sig) & (mask))) #define SIG_KERNEL_ONLY_MASK (\ rt_sigmask(SIGKILL) | rt_sigmask(SIGSTOP)) #define SIG_KERNEL_STOP_MASK (\ rt_sigmask(SIGSTOP) | rt_sigmask(SIGTSTP) | \ rt_sigmask(SIGTTIN) | rt_sigmask(SIGTTOU) ) #define SIG_KERNEL_COREDUMP_MASK (\ rt_sigmask(SIGQUIT) | rt_sigmask(SIGILL) | \ rt_sigmask(SIGTRAP) | rt_sigmask(SIGABRT) | \ rt_sigmask(SIGFPE) | rt_sigmask(SIGSEGV) | \ rt_sigmask(SIGBUS) | rt_sigmask(SIGSYS) | \ rt_sigmask(SIGXCPU) | rt_sigmask(SIGXFSZ) | \ SIGEMT_MASK ) #define SIG_KERNEL_IGNORE_MASK (\ rt_sigmask(SIGCONT) | rt_sigmask(SIGCHLD) | \ rt_sigmask(SIGWINCH) | rt_sigmask(SIGURG) ) #define SIG_SPECIFIC_SICODES_MASK (\ rt_sigmask(SIGILL) | rt_sigmask(SIGFPE) | \ rt_sigmask(SIGSEGV) | rt_sigmask(SIGBUS) | \ rt_sigmask(SIGTRAP) | rt_sigmask(SIGCHLD) | \ rt_sigmask(SIGPOLL) | rt_sigmask(SIGSYS) | \ SIGEMT_MASK ) #define sig_kernel_only(sig) siginmask(sig, SIG_KERNEL_ONLY_MASK) #define sig_kernel_coredump(sig) siginmask(sig, SIG_KERNEL_COREDUMP_MASK) #define sig_kernel_ignore(sig) siginmask(sig, SIG_KERNEL_IGNORE_MASK) #define sig_kernel_stop(sig) siginmask(sig, SIG_KERNEL_STOP_MASK) #define sig_specific_sicodes(sig) siginmask(sig, SIG_SPECIFIC_SICODES_MASK) #define sig_fatal(t, signr) \ (!siginmask(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \ (t)->sighand->action[(signr)-1].sa.sa_handler == SIG_DFL) void signals_init(void); int restore_altstack(const stack_t __user *); int __save_altstack(stack_t __user *, unsigned long); #define unsafe_save_altstack(uss, sp, label) do { \ stack_t __user *__uss = uss; \ struct task_struct *t = current; \ unsafe_put_user((void __user *)t->sas_ss_sp, &__uss->ss_sp, label); \ unsafe_put_user(t->sas_ss_flags, &__uss->ss_flags, label); \ unsafe_put_user(t->sas_ss_size, &__uss->ss_size, label); \ } while (0); #ifdef CONFIG_DYNAMIC_SIGFRAME bool sigaltstack_size_valid(size_t ss_size); #else static inline bool sigaltstack_size_valid(size_t size) { return true; } #endif /* !CONFIG_DYNAMIC_SIGFRAME */ #ifdef CONFIG_PROC_FS struct seq_file; extern void render_sigset_t(struct seq_file *, const char *, sigset_t *); #endif #ifndef arch_untagged_si_addr /* * Given a fault address and a signal and si_code which correspond to the * _sigfault union member, returns the address that must appear in si_addr if * the signal handler does not have SA_EXPOSE_TAGBITS enabled in sa_flags. */ static inline void __user *arch_untagged_si_addr(void __user *addr, unsigned long sig, unsigned long si_code) { return addr; } #endif #endif /* _LINUX_SIGNAL_H */
15 8 5 17 15 2 3 110 143 7 7 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Symmetric key ciphers. * * Copyright (c) 2007-2015 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_SKCIPHER_H #define _CRYPTO_SKCIPHER_H #include <linux/atomic.h> #include <linux/container_of.h> #include <linux/crypto.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/types.h> /* Set this bit if the lskcipher operation is a continuation. */ #define CRYPTO_LSKCIPHER_FLAG_CONT 0x00000001 /* Set this bit if the lskcipher operation is final. */ #define CRYPTO_LSKCIPHER_FLAG_FINAL 0x00000002 /* The bit CRYPTO_TFM_REQ_MAY_SLEEP can also be set if needed. */ /* Set this bit if the skcipher operation is a continuation. */ #define CRYPTO_SKCIPHER_REQ_CONT 0x00000001 /* Set this bit if the skcipher operation is not final. */ #define CRYPTO_SKCIPHER_REQ_NOTFINAL 0x00000002 struct scatterlist; /** * struct skcipher_request - Symmetric key cipher request * @cryptlen: Number of bytes to encrypt or decrypt * @iv: Initialisation Vector * @src: Source SG list * @dst: Destination SG list * @base: Underlying async request * @__ctx: Start of private context data */ struct skcipher_request { unsigned int cryptlen; u8 *iv; struct scatterlist *src; struct scatterlist *dst; struct crypto_async_request base; void *__ctx[] CRYPTO_MINALIGN_ATTR; }; struct crypto_skcipher { unsigned int reqsize; struct crypto_tfm base; }; struct crypto_sync_skcipher { struct crypto_skcipher base; }; struct crypto_lskcipher { struct crypto_tfm base; }; /* * struct skcipher_alg_common - common properties of skcipher_alg * @min_keysize: Minimum key size supported by the transformation. This is the * smallest key length supported by this transformation algorithm. * This must be set to one of the pre-defined values as this is * not hardware specific. Possible values for this field can be * found via git grep "_MIN_KEY_SIZE" include/crypto/ * @max_keysize: Maximum key size supported by the transformation. This is the * largest key length supported by this transformation algorithm. * This must be set to one of the pre-defined values as this is * not hardware specific. Possible values for this field can be * found via git grep "_MAX_KEY_SIZE" include/crypto/ * @ivsize: IV size applicable for transformation. The consumer must provide an * IV of exactly that size to perform the encrypt or decrypt operation. * @chunksize: Equal to the block size except for stream ciphers such as * CTR where it is set to the underlying block size. * @statesize: Size of the internal state for the algorithm. * @base: Definition of a generic crypto algorithm. */ #define SKCIPHER_ALG_COMMON { \ unsigned int min_keysize; \ unsigned int max_keysize; \ unsigned int ivsize; \ unsigned int chunksize; \ unsigned int statesize; \ \ struct crypto_alg base; \ } struct skcipher_alg_common SKCIPHER_ALG_COMMON; /** * struct skcipher_alg - symmetric key cipher definition * @setkey: Set key for the transformation. This function is used to either * program a supplied key into the hardware or store the key in the * transformation context for programming it later. Note that this * function does modify the transformation context. This function can * be called multiple times during the existence of the transformation * object, so one must make sure the key is properly reprogrammed into * the hardware. This function is also responsible for checking the key * length for validity. In case a software fallback was put in place in * the @cra_init call, this function might need to use the fallback if * the algorithm doesn't support all of the key sizes. * @encrypt: Encrypt a scatterlist of blocks. This function is used to encrypt * the supplied scatterlist containing the blocks of data. The crypto * API consumer is responsible for aligning the entries of the * scatterlist properly and making sure the chunks are correctly * sized. In case a software fallback was put in place in the * @cra_init call, this function might need to use the fallback if * the algorithm doesn't support all of the key sizes. In case the * key was stored in transformation context, the key might need to be * re-programmed into the hardware in this function. This function * shall not modify the transformation context, as this function may * be called in parallel with the same transformation object. * @decrypt: Decrypt a single block. This is a reverse counterpart to @encrypt * and the conditions are exactly the same. * @export: Export partial state of the transformation. This function dumps the * entire state of the ongoing transformation into a provided block of * data so it can be @import 'ed back later on. This is useful in case * you want to save partial result of the transformation after * processing certain amount of data and reload this partial result * multiple times later on for multiple re-use. No data processing * happens at this point. * @import: Import partial state of the transformation. This function loads the * entire state of the ongoing transformation from a provided block of * data so the transformation can continue from this point onward. No * data processing happens at this point. * @init: Initialize the cryptographic transformation object. This function * is used to initialize the cryptographic transformation object. * This function is called only once at the instantiation time, right * after the transformation context was allocated. In case the * cryptographic hardware has some special requirements which need to * be handled by software, this function shall check for the precise * requirement of the transformation and put any software fallbacks * in place. * @exit: Deinitialize the cryptographic transformation object. This is a * counterpart to @init, used to remove various changes set in * @init. * @walksize: Equal to the chunk size except in cases where the algorithm is * considerably more efficient if it can operate on multiple chunks * in parallel. Should be a multiple of chunksize. * @co: see struct skcipher_alg_common * * All fields except @ivsize are mandatory and must be filled. */ struct skcipher_alg { int (*setkey)(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); int (*encrypt)(struct skcipher_request *req); int (*decrypt)(struct skcipher_request *req); int (*export)(struct skcipher_request *req, void *out); int (*import)(struct skcipher_request *req, const void *in); int (*init)(struct crypto_skcipher *tfm); void (*exit)(struct crypto_skcipher *tfm); unsigned int walksize; union { struct SKCIPHER_ALG_COMMON; struct skcipher_alg_common co; }; }; /** * struct lskcipher_alg - linear symmetric key cipher definition * @setkey: Set key for the transformation. This function is used to either * program a supplied key into the hardware or store the key in the * transformation context for programming it later. Note that this * function does modify the transformation context. This function can * be called multiple times during the existence of the transformation * object, so one must make sure the key is properly reprogrammed into * the hardware. This function is also responsible for checking the key * length for validity. In case a software fallback was put in place in * the @cra_init call, this function might need to use the fallback if * the algorithm doesn't support all of the key sizes. * @encrypt: Encrypt a number of bytes. This function is used to encrypt * the supplied data. This function shall not modify * the transformation context, as this function may be called * in parallel with the same transformation object. Data * may be left over if length is not a multiple of blocks * and there is more to come (final == false). The number of * left-over bytes should be returned in case of success. * The siv field shall be as long as ivsize + statesize with * the IV placed at the front. The state will be used by the * algorithm internally. * @decrypt: Decrypt a number of bytes. This is a reverse counterpart to * @encrypt and the conditions are exactly the same. * @init: Initialize the cryptographic transformation object. This function * is used to initialize the cryptographic transformation object. * This function is called only once at the instantiation time, right * after the transformation context was allocated. * @exit: Deinitialize the cryptographic transformation object. This is a * counterpart to @init, used to remove various changes set in * @init. * @co: see struct skcipher_alg_common */ struct lskcipher_alg { int (*setkey)(struct crypto_lskcipher *tfm, const u8 *key, unsigned int keylen); int (*encrypt)(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *siv, u32 flags); int (*decrypt)(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *siv, u32 flags); int (*init)(struct crypto_lskcipher *tfm); void (*exit)(struct crypto_lskcipher *tfm); struct skcipher_alg_common co; }; #define MAX_SYNC_SKCIPHER_REQSIZE 384 /* * This performs a type-check against the "_tfm" argument to make sure * all users have the correct skcipher tfm for doing on-stack requests. */ #define SYNC_SKCIPHER_REQUEST_ON_STACK(name, _tfm) \ char __##name##_desc[sizeof(struct skcipher_request) + \ MAX_SYNC_SKCIPHER_REQSIZE \ ] CRYPTO_MINALIGN_ATTR; \ struct skcipher_request *name = \ (((struct skcipher_request *)__##name##_desc)->base.tfm = \ crypto_sync_skcipher_tfm((_tfm)), \ (void *)__##name##_desc) /** * DOC: Symmetric Key Cipher API * * Symmetric key cipher API is used with the ciphers of type * CRYPTO_ALG_TYPE_SKCIPHER (listed as type "skcipher" in /proc/crypto). * * Asynchronous cipher operations imply that the function invocation for a * cipher request returns immediately before the completion of the operation. * The cipher request is scheduled as a separate kernel thread and therefore * load-balanced on the different CPUs via the process scheduler. To allow * the kernel crypto API to inform the caller about the completion of a cipher * request, the caller must provide a callback function. That function is * invoked with the cipher handle when the request completes. * * To support the asynchronous operation, additional information than just the * cipher handle must be supplied to the kernel crypto API. That additional * information is given by filling in the skcipher_request data structure. * * For the symmetric key cipher API, the state is maintained with the tfm * cipher handle. A single tfm can be used across multiple calls and in * parallel. For asynchronous block cipher calls, context data supplied and * only used by the caller can be referenced the request data structure in * addition to the IV used for the cipher request. The maintenance of such * state information would be important for a crypto driver implementer to * have, because when calling the callback function upon completion of the * cipher operation, that callback function may need some information about * which operation just finished if it invoked multiple in parallel. This * state information is unused by the kernel crypto API. */ static inline struct crypto_skcipher *__crypto_skcipher_cast( struct crypto_tfm *tfm) { return container_of(tfm, struct crypto_skcipher, base); } /** * crypto_alloc_skcipher() - allocate symmetric key cipher handle * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * skcipher cipher * @type: specifies the type of the cipher * @mask: specifies the mask for the cipher * * Allocate a cipher handle for an skcipher. The returned struct * crypto_skcipher is the cipher handle that is required for any subsequent * API invocation for that skcipher. * * Return: allocated cipher handle in case of success; IS_ERR() is true in case * of an error, PTR_ERR() returns the error code. */ struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name, u32 type, u32 mask); struct crypto_sync_skcipher *crypto_alloc_sync_skcipher(const char *alg_name, u32 type, u32 mask); /** * crypto_alloc_lskcipher() - allocate linear symmetric key cipher handle * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * lskcipher * @type: specifies the type of the cipher * @mask: specifies the mask for the cipher * * Allocate a cipher handle for an lskcipher. The returned struct * crypto_lskcipher is the cipher handle that is required for any subsequent * API invocation for that lskcipher. * * Return: allocated cipher handle in case of success; IS_ERR() is true in case * of an error, PTR_ERR() returns the error code. */ struct crypto_lskcipher *crypto_alloc_lskcipher(const char *alg_name, u32 type, u32 mask); static inline struct crypto_tfm *crypto_skcipher_tfm( struct crypto_skcipher *tfm) { return &tfm->base; } static inline struct crypto_tfm *crypto_lskcipher_tfm( struct crypto_lskcipher *tfm) { return &tfm->base; } static inline struct crypto_tfm *crypto_sync_skcipher_tfm( struct crypto_sync_skcipher *tfm) { return crypto_skcipher_tfm(&tfm->base); } /** * crypto_free_skcipher() - zeroize and free cipher handle * @tfm: cipher handle to be freed * * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_skcipher(struct crypto_skcipher *tfm) { crypto_destroy_tfm(tfm, crypto_skcipher_tfm(tfm)); } static inline void crypto_free_sync_skcipher(struct crypto_sync_skcipher *tfm) { crypto_free_skcipher(&tfm->base); } /** * crypto_free_lskcipher() - zeroize and free cipher handle * @tfm: cipher handle to be freed * * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_lskcipher(struct crypto_lskcipher *tfm) { crypto_destroy_tfm(tfm, crypto_lskcipher_tfm(tfm)); } /** * crypto_has_skcipher() - Search for the availability of an skcipher. * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * skcipher * @type: specifies the type of the skcipher * @mask: specifies the mask for the skcipher * * Return: true when the skcipher is known to the kernel crypto API; false * otherwise */ int crypto_has_skcipher(const char *alg_name, u32 type, u32 mask); static inline const char *crypto_skcipher_driver_name( struct crypto_skcipher *tfm) { return crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)); } static inline const char *crypto_lskcipher_driver_name( struct crypto_lskcipher *tfm) { return crypto_tfm_alg_driver_name(crypto_lskcipher_tfm(tfm)); } static inline struct skcipher_alg_common *crypto_skcipher_alg_common( struct crypto_skcipher *tfm) { return container_of(crypto_skcipher_tfm(tfm)->__crt_alg, struct skcipher_alg_common, base); } static inline struct skcipher_alg *crypto_skcipher_alg( struct crypto_skcipher *tfm) { return container_of(crypto_skcipher_tfm(tfm)->__crt_alg, struct skcipher_alg, base); } static inline struct lskcipher_alg *crypto_lskcipher_alg( struct crypto_lskcipher *tfm) { return container_of(crypto_lskcipher_tfm(tfm)->__crt_alg, struct lskcipher_alg, co.base); } /** * crypto_skcipher_ivsize() - obtain IV size * @tfm: cipher handle * * The size of the IV for the skcipher referenced by the cipher handle is * returned. This IV size may be zero if the cipher does not need an IV. * * Return: IV size in bytes */ static inline unsigned int crypto_skcipher_ivsize(struct crypto_skcipher *tfm) { return crypto_skcipher_alg_common(tfm)->ivsize; } static inline unsigned int crypto_sync_skcipher_ivsize( struct crypto_sync_skcipher *tfm) { return crypto_skcipher_ivsize(&tfm->base); } /** * crypto_lskcipher_ivsize() - obtain IV size * @tfm: cipher handle * * The size of the IV for the lskcipher referenced by the cipher handle is * returned. This IV size may be zero if the cipher does not need an IV. * * Return: IV size in bytes */ static inline unsigned int crypto_lskcipher_ivsize( struct crypto_lskcipher *tfm) { return crypto_lskcipher_alg(tfm)->co.ivsize; } /** * crypto_skcipher_blocksize() - obtain block size of cipher * @tfm: cipher handle * * The block size for the skcipher referenced with the cipher handle is * returned. The caller may use that information to allocate appropriate * memory for the data returned by the encryption or decryption operation * * Return: block size of cipher */ static inline unsigned int crypto_skcipher_blocksize( struct crypto_skcipher *tfm) { return crypto_tfm_alg_blocksize(crypto_skcipher_tfm(tfm)); } /** * crypto_lskcipher_blocksize() - obtain block size of cipher * @tfm: cipher handle * * The block size for the lskcipher referenced with the cipher handle is * returned. The caller may use that information to allocate appropriate * memory for the data returned by the encryption or decryption operation * * Return: block size of cipher */ static inline unsigned int crypto_lskcipher_blocksize( struct crypto_lskcipher *tfm) { return crypto_tfm_alg_blocksize(crypto_lskcipher_tfm(tfm)); } /** * crypto_skcipher_chunksize() - obtain chunk size * @tfm: cipher handle * * The block size is set to one for ciphers such as CTR. However, * you still need to provide incremental updates in multiples of * the underlying block size as the IV does not have sub-block * granularity. This is known in this API as the chunk size. * * Return: chunk size in bytes */ static inline unsigned int crypto_skcipher_chunksize( struct crypto_skcipher *tfm) { return crypto_skcipher_alg_common(tfm)->chunksize; } /** * crypto_lskcipher_chunksize() - obtain chunk size * @tfm: cipher handle * * The block size is set to one for ciphers such as CTR. However, * you still need to provide incremental updates in multiples of * the underlying block size as the IV does not have sub-block * granularity. This is known in this API as the chunk size. * * Return: chunk size in bytes */ static inline unsigned int crypto_lskcipher_chunksize( struct crypto_lskcipher *tfm) { return crypto_lskcipher_alg(tfm)->co.chunksize; } /** * crypto_skcipher_statesize() - obtain state size * @tfm: cipher handle * * Some algorithms cannot be chained with the IV alone. They carry * internal state which must be replicated if data is to be processed * incrementally. The size of that state can be obtained with this * function. * * Return: state size in bytes */ static inline unsigned int crypto_skcipher_statesize( struct crypto_skcipher *tfm) { return crypto_skcipher_alg_common(tfm)->statesize; } /** * crypto_lskcipher_statesize() - obtain state size * @tfm: cipher handle * * Some algorithms cannot be chained with the IV alone. They carry * internal state which must be replicated if data is to be processed * incrementally. The size of that state can be obtained with this * function. * * Return: state size in bytes */ static inline unsigned int crypto_lskcipher_statesize( struct crypto_lskcipher *tfm) { return crypto_lskcipher_alg(tfm)->co.statesize; } static inline unsigned int crypto_sync_skcipher_blocksize( struct crypto_sync_skcipher *tfm) { return crypto_skcipher_blocksize(&tfm->base); } static inline unsigned int crypto_skcipher_alignmask( struct crypto_skcipher *tfm) { return crypto_tfm_alg_alignmask(crypto_skcipher_tfm(tfm)); } static inline unsigned int crypto_lskcipher_alignmask( struct crypto_lskcipher *tfm) { return crypto_tfm_alg_alignmask(crypto_lskcipher_tfm(tfm)); } static inline u32 crypto_skcipher_get_flags(struct crypto_skcipher *tfm) { return crypto_tfm_get_flags(crypto_skcipher_tfm(tfm)); } static inline void crypto_skcipher_set_flags(struct crypto_skcipher *tfm, u32 flags) { crypto_tfm_set_flags(crypto_skcipher_tfm(tfm), flags); } static inline void crypto_skcipher_clear_flags(struct crypto_skcipher *tfm, u32 flags) { crypto_tfm_clear_flags(crypto_skcipher_tfm(tfm), flags); } static inline u32 crypto_sync_skcipher_get_flags( struct crypto_sync_skcipher *tfm) { return crypto_skcipher_get_flags(&tfm->base); } static inline void crypto_sync_skcipher_set_flags( struct crypto_sync_skcipher *tfm, u32 flags) { crypto_skcipher_set_flags(&tfm->base, flags); } static inline void crypto_sync_skcipher_clear_flags( struct crypto_sync_skcipher *tfm, u32 flags) { crypto_skcipher_clear_flags(&tfm->base, flags); } static inline u32 crypto_lskcipher_get_flags(struct crypto_lskcipher *tfm) { return crypto_tfm_get_flags(crypto_lskcipher_tfm(tfm)); } static inline void crypto_lskcipher_set_flags(struct crypto_lskcipher *tfm, u32 flags) { crypto_tfm_set_flags(crypto_lskcipher_tfm(tfm), flags); } static inline void crypto_lskcipher_clear_flags(struct crypto_lskcipher *tfm, u32 flags) { crypto_tfm_clear_flags(crypto_lskcipher_tfm(tfm), flags); } /** * crypto_skcipher_setkey() - set key for cipher * @tfm: cipher handle * @key: buffer holding the key * @keylen: length of the key in bytes * * The caller provided key is set for the skcipher referenced by the cipher * handle. * * Note, the key length determines the cipher type. Many block ciphers implement * different cipher modes depending on the key size, such as AES-128 vs AES-192 * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 * is performed. * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); static inline int crypto_sync_skcipher_setkey(struct crypto_sync_skcipher *tfm, const u8 *key, unsigned int keylen) { return crypto_skcipher_setkey(&tfm->base, key, keylen); } /** * crypto_lskcipher_setkey() - set key for cipher * @tfm: cipher handle * @key: buffer holding the key * @keylen: length of the key in bytes * * The caller provided key is set for the lskcipher referenced by the cipher * handle. * * Note, the key length determines the cipher type. Many block ciphers implement * different cipher modes depending on the key size, such as AES-128 vs AES-192 * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 * is performed. * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_lskcipher_setkey(struct crypto_lskcipher *tfm, const u8 *key, unsigned int keylen); static inline unsigned int crypto_skcipher_min_keysize( struct crypto_skcipher *tfm) { return crypto_skcipher_alg_common(tfm)->min_keysize; } static inline unsigned int crypto_skcipher_max_keysize( struct crypto_skcipher *tfm) { return crypto_skcipher_alg_common(tfm)->max_keysize; } static inline unsigned int crypto_lskcipher_min_keysize( struct crypto_lskcipher *tfm) { return crypto_lskcipher_alg(tfm)->co.min_keysize; } static inline unsigned int crypto_lskcipher_max_keysize( struct crypto_lskcipher *tfm) { return crypto_lskcipher_alg(tfm)->co.max_keysize; } /** * crypto_skcipher_reqtfm() - obtain cipher handle from request * @req: skcipher_request out of which the cipher handle is to be obtained * * Return the crypto_skcipher handle when furnishing an skcipher_request * data structure. * * Return: crypto_skcipher handle */ static inline struct crypto_skcipher *crypto_skcipher_reqtfm( struct skcipher_request *req) { return __crypto_skcipher_cast(req->base.tfm); } static inline struct crypto_sync_skcipher *crypto_sync_skcipher_reqtfm( struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); return container_of(tfm, struct crypto_sync_skcipher, base); } /** * crypto_skcipher_encrypt() - encrypt plaintext * @req: reference to the skcipher_request handle that holds all information * needed to perform the cipher operation * * Encrypt plaintext data using the skcipher_request handle. That data * structure and how it is filled with data is discussed with the * skcipher_request_* functions. * * Return: 0 if the cipher operation was successful; < 0 if an error occurred */ int crypto_skcipher_encrypt(struct skcipher_request *req); /** * crypto_skcipher_decrypt() - decrypt ciphertext * @req: reference to the skcipher_request handle that holds all information * needed to perform the cipher operation * * Decrypt ciphertext data using the skcipher_request handle. That data * structure and how it is filled with data is discussed with the * skcipher_request_* functions. * * Return: 0 if the cipher operation was successful; < 0 if an error occurred */ int crypto_skcipher_decrypt(struct skcipher_request *req); /** * crypto_skcipher_export() - export partial state * @req: reference to the skcipher_request handle that holds all information * needed to perform the operation * @out: output buffer of sufficient size that can hold the state * * Export partial state of the transformation. This function dumps the * entire state of the ongoing transformation into a provided block of * data so it can be @import 'ed back later on. This is useful in case * you want to save partial result of the transformation after * processing certain amount of data and reload this partial result * multiple times later on for multiple re-use. No data processing * happens at this point. * * Return: 0 if the cipher operation was successful; < 0 if an error occurred */ int crypto_skcipher_export(struct skcipher_request *req, void *out); /** * crypto_skcipher_import() - import partial state * @req: reference to the skcipher_request handle that holds all information * needed to perform the operation * @in: buffer holding the state * * Import partial state of the transformation. This function loads the * entire state of the ongoing transformation from a provided block of * data so the transformation can continue from this point onward. No * data processing happens at this point. * * Return: 0 if the cipher operation was successful; < 0 if an error occurred */ int crypto_skcipher_import(struct skcipher_request *req, const void *in); /** * crypto_lskcipher_encrypt() - encrypt plaintext * @tfm: lskcipher handle * @src: source buffer * @dst: destination buffer * @len: number of bytes to process * @siv: IV + state for the cipher operation. The length of the IV must * comply with the IV size defined by crypto_lskcipher_ivsize. The * IV is then followed with a buffer with the length as specified by * crypto_lskcipher_statesize. * Encrypt plaintext data using the lskcipher handle. * * Return: >=0 if the cipher operation was successful, if positive * then this many bytes have been left unprocessed; * < 0 if an error occurred */ int crypto_lskcipher_encrypt(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *siv); /** * crypto_lskcipher_decrypt() - decrypt ciphertext * @tfm: lskcipher handle * @src: source buffer * @dst: destination buffer * @len: number of bytes to process * @siv: IV + state for the cipher operation. The length of the IV must * comply with the IV size defined by crypto_lskcipher_ivsize. The * IV is then followed with a buffer with the length as specified by * crypto_lskcipher_statesize. * * Decrypt ciphertext data using the lskcipher handle. * * Return: >=0 if the cipher operation was successful, if positive * then this many bytes have been left unprocessed; * < 0 if an error occurred */ int crypto_lskcipher_decrypt(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *siv); /** * DOC: Symmetric Key Cipher Request Handle * * The skcipher_request data structure contains all pointers to data * required for the symmetric key cipher operation. This includes the cipher * handle (which can be used by multiple skcipher_request instances), pointer * to plaintext and ciphertext, asynchronous callback function, etc. It acts * as a handle to the skcipher_request_* API calls in a similar way as * skcipher handle to the crypto_skcipher_* API calls. */ /** * crypto_skcipher_reqsize() - obtain size of the request data structure * @tfm: cipher handle * * Return: number of bytes */ static inline unsigned int crypto_skcipher_reqsize(struct crypto_skcipher *tfm) { return tfm->reqsize; } /** * skcipher_request_set_tfm() - update cipher handle reference in request * @req: request handle to be modified * @tfm: cipher handle that shall be added to the request handle * * Allow the caller to replace the existing skcipher handle in the request * data structure with a different one. */ static inline void skcipher_request_set_tfm(struct skcipher_request *req, struct crypto_skcipher *tfm) { req->base.tfm = crypto_skcipher_tfm(tfm); } static inline void skcipher_request_set_sync_tfm(struct skcipher_request *req, struct crypto_sync_skcipher *tfm) { skcipher_request_set_tfm(req, &tfm->base); } static inline struct skcipher_request *skcipher_request_cast( struct crypto_async_request *req) { return container_of(req, struct skcipher_request, base); } /** * skcipher_request_alloc() - allocate request data structure * @tfm: cipher handle to be registered with the request * @gfp: memory allocation flag that is handed to kmalloc by the API call. * * Allocate the request data structure that must be used with the skcipher * encrypt and decrypt API calls. During the allocation, the provided skcipher * handle is registered in the request data structure. * * Return: allocated request handle in case of success, or NULL if out of memory */ static inline struct skcipher_request *skcipher_request_alloc_noprof( struct crypto_skcipher *tfm, gfp_t gfp) { struct skcipher_request *req; req = kmalloc_noprof(sizeof(struct skcipher_request) + crypto_skcipher_reqsize(tfm), gfp); if (likely(req)) skcipher_request_set_tfm(req, tfm); return req; } #define skcipher_request_alloc(...) alloc_hooks(skcipher_request_alloc_noprof(__VA_ARGS__)) /** * skcipher_request_free() - zeroize and free request data structure * @req: request data structure cipher handle to be freed */ static inline void skcipher_request_free(struct skcipher_request *req) { kfree_sensitive(req); } static inline void skcipher_request_zero(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); memzero_explicit(req, sizeof(*req) + crypto_skcipher_reqsize(tfm)); } /** * skcipher_request_set_callback() - set asynchronous callback function * @req: request handle * @flags: specify zero or an ORing of the flags * CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and * increase the wait queue beyond the initial maximum size; * CRYPTO_TFM_REQ_MAY_SLEEP the request processing may sleep * @compl: callback function pointer to be registered with the request handle * @data: The data pointer refers to memory that is not used by the kernel * crypto API, but provided to the callback function for it to use. Here, * the caller can provide a reference to memory the callback function can * operate on. As the callback function is invoked asynchronously to the * related functionality, it may need to access data structures of the * related functionality which can be referenced using this pointer. The * callback function can access the memory via the "data" field in the * crypto_async_request data structure provided to the callback function. * * This function allows setting the callback function that is triggered once the * cipher operation completes. * * The callback function is registered with the skcipher_request handle and * must comply with the following template:: * * void callback_function(struct crypto_async_request *req, int error) */ static inline void skcipher_request_set_callback(struct skcipher_request *req, u32 flags, crypto_completion_t compl, void *data) { req->base.complete = compl; req->base.data = data; req->base.flags = flags; } /** * skcipher_request_set_crypt() - set data buffers * @req: request handle * @src: source scatter / gather list * @dst: destination scatter / gather list * @cryptlen: number of bytes to process from @src * @iv: IV for the cipher operation which must comply with the IV size defined * by crypto_skcipher_ivsize * * This function allows setting of the source data and destination data * scatter / gather lists. * * For encryption, the source is treated as the plaintext and the * destination is the ciphertext. For a decryption operation, the use is * reversed - the source is the ciphertext and the destination is the plaintext. */ static inline void skcipher_request_set_crypt( struct skcipher_request *req, struct scatterlist *src, struct scatterlist *dst, unsigned int cryptlen, void *iv) { req->src = src; req->dst = dst; req->cryptlen = cryptlen; req->iv = iv; } #endif /* _CRYPTO_SKCIPHER_H */
20 20 20 3 20 20 23 23 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 // SPDX-License-Identifier: GPL-2.0-only /* * CAN driver for PEAK System USB adapters * Derived from the PCAN project file driver/src/pcan_usb_core.c * * Copyright (C) 2003-2010 PEAK System-Technik GmbH * Copyright (C) 2010-2012 Stephane Grosjean <s.grosjean@peak-system.com> * * Many thanks to Klaus Hitschler <klaus.hitschler@gmx.de> */ #include <linux/device.h> #include <linux/ethtool.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/signal.h> #include <linux/slab.h> #include <linux/sysfs.h> #include <linux/usb.h> #include <linux/can.h> #include <linux/can/dev.h> #include <linux/can/error.h> #include "pcan_usb_core.h" MODULE_AUTHOR("Stephane Grosjean <s.grosjean@peak-system.com>"); MODULE_DESCRIPTION("CAN driver for PEAK-System USB adapters"); MODULE_LICENSE("GPL v2"); /* Table of devices that work with this driver */ static const struct usb_device_id peak_usb_table[] = { { USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USB_PRODUCT_ID), .driver_info = (kernel_ulong_t)&pcan_usb, }, { USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPRO_PRODUCT_ID), .driver_info = (kernel_ulong_t)&pcan_usb_pro, }, { USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBFD_PRODUCT_ID), .driver_info = (kernel_ulong_t)&pcan_usb_fd, }, { USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPROFD_PRODUCT_ID), .driver_info = (kernel_ulong_t)&pcan_usb_pro_fd, }, { USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBCHIP_PRODUCT_ID), .driver_info = (kernel_ulong_t)&pcan_usb_chip, }, { USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBX6_PRODUCT_ID), .driver_info = (kernel_ulong_t)&pcan_usb_x6, }, { /* Terminating entry */ } }; MODULE_DEVICE_TABLE(usb, peak_usb_table); static ssize_t can_channel_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *netdev = to_net_dev(dev); struct peak_usb_device *peak_dev = netdev_priv(netdev); return sysfs_emit(buf, "%08X\n", peak_dev->can_channel_id); } static DEVICE_ATTR_RO(can_channel_id); /* mutable to avoid cast in attribute_group */ static struct attribute *peak_usb_sysfs_attrs[] = { &dev_attr_can_channel_id.attr, NULL, }; static const struct attribute_group peak_usb_sysfs_group = { .name = "peak_usb", .attrs = peak_usb_sysfs_attrs, }; /* * dump memory */ #define DUMP_WIDTH 16 void pcan_dump_mem(const char *prompt, const void *p, int l) { pr_info("%s dumping %s (%d bytes):\n", PCAN_USB_DRIVER_NAME, prompt ? prompt : "memory", l); print_hex_dump(KERN_INFO, PCAN_USB_DRIVER_NAME " ", DUMP_PREFIX_NONE, DUMP_WIDTH, 1, p, l, false); } /* * initialize a time_ref object with usb adapter own settings */ void peak_usb_init_time_ref(struct peak_time_ref *time_ref, const struct peak_usb_adapter *adapter) { if (time_ref) { memset(time_ref, 0, sizeof(struct peak_time_ref)); time_ref->adapter = adapter; } } /* * sometimes, another now may be more recent than current one... */ void peak_usb_update_ts_now(struct peak_time_ref *time_ref, u32 ts_now) { time_ref->ts_dev_2 = ts_now; /* should wait at least two passes before computing */ if (ktime_to_ns(time_ref->tv_host) > 0) { u32 delta_ts = time_ref->ts_dev_2 - time_ref->ts_dev_1; if (time_ref->ts_dev_2 < time_ref->ts_dev_1) delta_ts &= (1 << time_ref->adapter->ts_used_bits) - 1; time_ref->ts_total += delta_ts; } } /* * register device timestamp as now */ void peak_usb_set_ts_now(struct peak_time_ref *time_ref, u32 ts_now) { if (ktime_to_ns(time_ref->tv_host_0) == 0) { /* use monotonic clock to correctly compute further deltas */ time_ref->tv_host_0 = ktime_get(); time_ref->tv_host = ktime_set(0, 0); } else { /* * delta_us should not be >= 2^32 => delta should be < 4294s * handle 32-bits wrapping here: if count of s. reaches 4200, * reset counters and change time base */ if (ktime_to_ns(time_ref->tv_host)) { ktime_t delta = ktime_sub(time_ref->tv_host, time_ref->tv_host_0); if (ktime_to_ns(delta) > (4200ull * NSEC_PER_SEC)) { time_ref->tv_host_0 = time_ref->tv_host; time_ref->ts_total = 0; } } time_ref->tv_host = ktime_get(); time_ref->tick_count++; } time_ref->ts_dev_1 = time_ref->ts_dev_2; peak_usb_update_ts_now(time_ref, ts_now); } /* * compute time according to current ts and time_ref data */ void peak_usb_get_ts_time(struct peak_time_ref *time_ref, u32 ts, ktime_t *time) { /* protect from getting time before setting now */ if (ktime_to_ns(time_ref->tv_host)) { u64 delta_us; s64 delta_ts = 0; /* General case: dev_ts_1 < dev_ts_2 < ts, with: * * - dev_ts_1 = previous sync timestamp * - dev_ts_2 = last sync timestamp * - ts = event timestamp * - ts_period = known sync period (theoretical) * ~ dev_ts2 - dev_ts1 * *but*: * * - time counters wrap (see adapter->ts_used_bits) * - sometimes, dev_ts_1 < ts < dev_ts2 * * "normal" case (sync time counters increase): * must take into account case when ts wraps (tsw) * * < ts_period > < > * | | | * ---+--------+----+-------0-+--+--> * ts_dev_1 | ts_dev_2 | * ts tsw */ if (time_ref->ts_dev_1 < time_ref->ts_dev_2) { /* case when event time (tsw) wraps */ if (ts < time_ref->ts_dev_1) delta_ts = BIT_ULL(time_ref->adapter->ts_used_bits); /* Otherwise, sync time counter (ts_dev_2) has wrapped: * handle case when event time (tsn) hasn't. * * < ts_period > < > * | | | * ---+--------+--0-+---------+--+--> * ts_dev_1 | ts_dev_2 | * tsn ts */ } else if (time_ref->ts_dev_1 < ts) { delta_ts = -BIT_ULL(time_ref->adapter->ts_used_bits); } /* add delay between last sync and event timestamps */ delta_ts += (signed int)(ts - time_ref->ts_dev_2); /* add time from beginning to last sync */ delta_ts += time_ref->ts_total; /* convert ticks number into microseconds */ delta_us = delta_ts * time_ref->adapter->us_per_ts_scale; delta_us >>= time_ref->adapter->us_per_ts_shift; *time = ktime_add_us(time_ref->tv_host_0, delta_us); } else { *time = ktime_get(); } } /* post received skb with native 64-bit hw timestamp */ int peak_usb_netif_rx_64(struct sk_buff *skb, u32 ts_low, u32 ts_high) { struct skb_shared_hwtstamps *hwts = skb_hwtstamps(skb); u64 ns_ts; ns_ts = (u64)ts_high << 32 | ts_low; ns_ts *= NSEC_PER_USEC; hwts->hwtstamp = ns_to_ktime(ns_ts); return netif_rx(skb); } /* * callback for bulk Rx urb */ static void peak_usb_read_bulk_callback(struct urb *urb) { struct peak_usb_device *dev = urb->context; struct net_device *netdev; int err; netdev = dev->netdev; if (!netif_device_present(netdev)) return; /* check reception status */ switch (urb->status) { case 0: /* success */ break; case -EILSEQ: case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: return; default: if (net_ratelimit()) netdev_err(netdev, "Rx urb aborted (%d)\n", urb->status); goto resubmit_urb; } /* protect from any incoming empty msgs */ if ((urb->actual_length > 0) && (dev->adapter->dev_decode_buf)) { /* handle these kinds of msgs only if _start callback called */ if (dev->state & PCAN_USB_STATE_STARTED) { err = dev->adapter->dev_decode_buf(dev, urb); if (err) pcan_dump_mem("received usb message", urb->transfer_buffer, urb->transfer_buffer_length); } } resubmit_urb: usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, dev->ep_msg_in), urb->transfer_buffer, dev->adapter->rx_buffer_size, peak_usb_read_bulk_callback, dev); usb_anchor_urb(urb, &dev->rx_submitted); err = usb_submit_urb(urb, GFP_ATOMIC); if (!err) return; usb_unanchor_urb(urb); if (err == -ENODEV) netif_device_detach(netdev); else netdev_err(netdev, "failed resubmitting read bulk urb: %d\n", err); } /* * callback for bulk Tx urb */ static void peak_usb_write_bulk_callback(struct urb *urb) { struct peak_tx_urb_context *context = urb->context; struct peak_usb_device *dev; struct net_device *netdev; int tx_bytes; BUG_ON(!context); dev = context->dev; netdev = dev->netdev; atomic_dec(&dev->active_tx_urbs); if (!netif_device_present(netdev)) return; /* check tx status */ switch (urb->status) { case 0: /* prevent tx timeout */ netif_trans_update(netdev); break; case -EPROTO: case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: break; default: if (net_ratelimit()) netdev_err(netdev, "Tx urb aborted (%d)\n", urb->status); break; } /* should always release echo skb and corresponding context */ tx_bytes = can_get_echo_skb(netdev, context->echo_index, NULL); context->echo_index = PCAN_USB_MAX_TX_URBS; if (!urb->status) { /* transmission complete */ netdev->stats.tx_packets++; netdev->stats.tx_bytes += tx_bytes; /* do wakeup tx queue in case of success only */ netif_wake_queue(netdev); } } /* * called by netdev to send one skb on the CAN interface. */ static netdev_tx_t peak_usb_ndo_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct peak_usb_device *dev = netdev_priv(netdev); struct peak_tx_urb_context *context = NULL; struct net_device_stats *stats = &netdev->stats; struct urb *urb; u8 *obuf; int i, err; size_t size = dev->adapter->tx_buffer_size; if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; for (i = 0; i < PCAN_USB_MAX_TX_URBS; i++) if (dev->tx_contexts[i].echo_index == PCAN_USB_MAX_TX_URBS) { context = dev->tx_contexts + i; break; } if (!context) { /* should not occur except during restart */ return NETDEV_TX_BUSY; } urb = context->urb; obuf = urb->transfer_buffer; err = dev->adapter->dev_encode_msg(dev, skb, obuf, &size); if (err) { if (net_ratelimit()) netdev_err(netdev, "packet dropped\n"); dev_kfree_skb(skb); stats->tx_dropped++; return NETDEV_TX_OK; } context->echo_index = i; usb_anchor_urb(urb, &dev->tx_submitted); can_put_echo_skb(skb, netdev, context->echo_index, 0); atomic_inc(&dev->active_tx_urbs); err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { can_free_echo_skb(netdev, context->echo_index, NULL); usb_unanchor_urb(urb); /* this context is not used in fact */ context->echo_index = PCAN_USB_MAX_TX_URBS; atomic_dec(&dev->active_tx_urbs); switch (err) { case -ENODEV: netif_device_detach(netdev); break; default: netdev_warn(netdev, "tx urb submitting failed err=%d\n", err); fallthrough; case -ENOENT: /* cable unplugged */ stats->tx_dropped++; } } else { netif_trans_update(netdev); /* slow down tx path */ if (atomic_read(&dev->active_tx_urbs) >= PCAN_USB_MAX_TX_URBS) netif_stop_queue(netdev); } return NETDEV_TX_OK; } /* * start the CAN interface. * Rx and Tx urbs are allocated here. Rx urbs are submitted here. */ static int peak_usb_start(struct peak_usb_device *dev) { struct net_device *netdev = dev->netdev; int err, i; for (i = 0; i < PCAN_USB_MAX_RX_URBS; i++) { struct urb *urb; u8 *buf; /* create a URB, and a buffer for it, to receive usb messages */ urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { err = -ENOMEM; break; } buf = kmalloc(dev->adapter->rx_buffer_size, GFP_KERNEL); if (!buf) { usb_free_urb(urb); err = -ENOMEM; break; } usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, dev->ep_msg_in), buf, dev->adapter->rx_buffer_size, peak_usb_read_bulk_callback, dev); /* ask last usb_free_urb() to also kfree() transfer_buffer */ urb->transfer_flags |= URB_FREE_BUFFER; usb_anchor_urb(urb, &dev->rx_submitted); err = usb_submit_urb(urb, GFP_KERNEL); if (err) { if (err == -ENODEV) netif_device_detach(dev->netdev); usb_unanchor_urb(urb); kfree(buf); usb_free_urb(urb); break; } /* drop reference, USB core will take care of freeing it */ usb_free_urb(urb); } /* did we submit any URBs? Warn if we was not able to submit all urbs */ if (i < PCAN_USB_MAX_RX_URBS) { if (i == 0) { netdev_err(netdev, "couldn't setup any rx URB\n"); return err; } netdev_warn(netdev, "rx performance may be slow\n"); } /* pre-alloc tx buffers and corresponding urbs */ for (i = 0; i < PCAN_USB_MAX_TX_URBS; i++) { struct peak_tx_urb_context *context; struct urb *urb; u8 *buf; /* create a URB and a buffer for it, to transmit usb messages */ urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { err = -ENOMEM; break; } buf = kmalloc(dev->adapter->tx_buffer_size, GFP_KERNEL); if (!buf) { usb_free_urb(urb); err = -ENOMEM; break; } context = dev->tx_contexts + i; context->dev = dev; context->urb = urb; usb_fill_bulk_urb(urb, dev->udev, usb_sndbulkpipe(dev->udev, dev->ep_msg_out), buf, dev->adapter->tx_buffer_size, peak_usb_write_bulk_callback, context); /* ask last usb_free_urb() to also kfree() transfer_buffer */ urb->transfer_flags |= URB_FREE_BUFFER; } /* warn if we were not able to allocate enough tx contexts */ if (i < PCAN_USB_MAX_TX_URBS) { if (i == 0) { netdev_err(netdev, "couldn't setup any tx URB\n"); goto err_tx; } netdev_warn(netdev, "tx performance may be slow\n"); } if (dev->adapter->dev_start) { err = dev->adapter->dev_start(dev); if (err) goto err_adapter; } dev->state |= PCAN_USB_STATE_STARTED; /* can set bus on now */ if (dev->adapter->dev_set_bus) { err = dev->adapter->dev_set_bus(dev, 1); if (err) goto err_adapter; } dev->can.state = CAN_STATE_ERROR_ACTIVE; return 0; err_adapter: if (err == -ENODEV) netif_device_detach(dev->netdev); netdev_warn(netdev, "couldn't submit control: %d\n", err); for (i = 0; i < PCAN_USB_MAX_TX_URBS; i++) { usb_free_urb(dev->tx_contexts[i].urb); dev->tx_contexts[i].urb = NULL; } err_tx: usb_kill_anchored_urbs(&dev->rx_submitted); return err; } /* * called by netdev to open the corresponding CAN interface. */ static int peak_usb_ndo_open(struct net_device *netdev) { struct peak_usb_device *dev = netdev_priv(netdev); int err; /* common open */ err = open_candev(netdev); if (err) return err; /* finally start device */ err = peak_usb_start(dev); if (err) { netdev_err(netdev, "couldn't start device: %d\n", err); close_candev(netdev); return err; } netif_start_queue(netdev); return 0; } /* * unlink in-flight Rx and Tx urbs and free their memory. */ static void peak_usb_unlink_all_urbs(struct peak_usb_device *dev) { int i; /* free all Rx (submitted) urbs */ usb_kill_anchored_urbs(&dev->rx_submitted); /* free unsubmitted Tx urbs first */ for (i = 0; i < PCAN_USB_MAX_TX_URBS; i++) { struct urb *urb = dev->tx_contexts[i].urb; if (!urb || dev->tx_contexts[i].echo_index != PCAN_USB_MAX_TX_URBS) { /* * this urb is already released or always submitted, * let usb core free by itself */ continue; } usb_free_urb(urb); dev->tx_contexts[i].urb = NULL; } /* then free all submitted Tx urbs */ usb_kill_anchored_urbs(&dev->tx_submitted); atomic_set(&dev->active_tx_urbs, 0); } /* * called by netdev to close the corresponding CAN interface. */ static int peak_usb_ndo_stop(struct net_device *netdev) { struct peak_usb_device *dev = netdev_priv(netdev); dev->state &= ~PCAN_USB_STATE_STARTED; netif_stop_queue(netdev); close_candev(netdev); dev->can.state = CAN_STATE_STOPPED; /* unlink all pending urbs and free used memory */ peak_usb_unlink_all_urbs(dev); if (dev->adapter->dev_stop) dev->adapter->dev_stop(dev); /* can set bus off now */ if (dev->adapter->dev_set_bus) { int err = dev->adapter->dev_set_bus(dev, 0); if (err) return err; } return 0; } /* * handle end of waiting for the device to reset */ void peak_usb_restart_complete(struct peak_usb_device *dev) { /* finally MUST update can state */ dev->can.state = CAN_STATE_ERROR_ACTIVE; /* netdev queue can be awaken now */ netif_wake_queue(dev->netdev); } void peak_usb_async_complete(struct urb *urb) { kfree(urb->transfer_buffer); usb_free_urb(urb); } /* * device (auto-)restart mechanism runs in a timer context => * MUST handle restart with asynchronous usb transfers */ static int peak_usb_restart(struct peak_usb_device *dev) { struct urb *urb; int err; u8 *buf; /* * if device doesn't define any asynchronous restart handler, simply * wake the netdev queue up */ if (!dev->adapter->dev_restart_async) { peak_usb_restart_complete(dev); return 0; } /* first allocate a urb to handle the asynchronous steps */ urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) return -ENOMEM; /* also allocate enough space for the commands to send */ buf = kmalloc(PCAN_USB_MAX_CMD_LEN, GFP_ATOMIC); if (!buf) { usb_free_urb(urb); return -ENOMEM; } /* call the device specific handler for the restart */ err = dev->adapter->dev_restart_async(dev, urb, buf); if (!err) return 0; kfree(buf); usb_free_urb(urb); return err; } /* * candev callback used to change CAN mode. * Warning: this is called from a timer context! */ static int peak_usb_set_mode(struct net_device *netdev, enum can_mode mode) { struct peak_usb_device *dev = netdev_priv(netdev); int err = 0; switch (mode) { case CAN_MODE_START: err = peak_usb_restart(dev); if (err) netdev_err(netdev, "couldn't start device (err %d)\n", err); break; default: return -EOPNOTSUPP; } return err; } /* * candev callback used to set device nominal/arbitration bitrate. */ static int peak_usb_set_bittiming(struct net_device *netdev) { struct peak_usb_device *dev = netdev_priv(netdev); const struct peak_usb_adapter *pa = dev->adapter; if (pa->dev_set_bittiming) { struct can_bittiming *bt = &dev->can.bittiming; int err = pa->dev_set_bittiming(dev, bt); if (err) netdev_info(netdev, "couldn't set bitrate (err %d)\n", err); return err; } return 0; } /* * candev callback used to set device data bitrate. */ static int peak_usb_set_data_bittiming(struct net_device *netdev) { struct peak_usb_device *dev = netdev_priv(netdev); const struct peak_usb_adapter *pa = dev->adapter; if (pa->dev_set_data_bittiming) { struct can_bittiming *bt = &dev->can.fd.data_bittiming; int err = pa->dev_set_data_bittiming(dev, bt); if (err) netdev_info(netdev, "couldn't set data bitrate (err %d)\n", err); return err; } return 0; } static int peak_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { struct hwtstamp_config hwts_cfg = { 0 }; switch (cmd) { case SIOCSHWTSTAMP: /* set */ if (copy_from_user(&hwts_cfg, ifr->ifr_data, sizeof(hwts_cfg))) return -EFAULT; if (hwts_cfg.tx_type == HWTSTAMP_TX_OFF && hwts_cfg.rx_filter == HWTSTAMP_FILTER_ALL) return 0; return -ERANGE; case SIOCGHWTSTAMP: /* get */ hwts_cfg.tx_type = HWTSTAMP_TX_OFF; hwts_cfg.rx_filter = HWTSTAMP_FILTER_ALL; if (copy_to_user(ifr->ifr_data, &hwts_cfg, sizeof(hwts_cfg))) return -EFAULT; return 0; default: return -EOPNOTSUPP; } } static const struct net_device_ops peak_usb_netdev_ops = { .ndo_open = peak_usb_ndo_open, .ndo_stop = peak_usb_ndo_stop, .ndo_eth_ioctl = peak_eth_ioctl, .ndo_start_xmit = peak_usb_ndo_start_xmit, .ndo_change_mtu = can_change_mtu, }; /* CAN-USB devices generally handle 32-bit CAN channel IDs. * In case one doesn't, then it have to overload this function. */ int peak_usb_get_eeprom_len(struct net_device *netdev) { return sizeof(u32); } /* Every CAN-USB device exports the dev_get_can_channel_id() operation. It is used * here to fill the data buffer with the user defined CAN channel ID. */ int peak_usb_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom, u8 *data) { struct peak_usb_device *dev = netdev_priv(netdev); u32 ch_id; __le32 ch_id_le; int err; err = dev->adapter->dev_get_can_channel_id(dev, &ch_id); if (err) return err; /* ethtool operates on individual bytes. The byte order of the CAN * channel id in memory depends on the kernel architecture. We * convert the CAN channel id back to the native byte order of the PEAK * device itself to ensure that the order is consistent for all * host architectures. */ ch_id_le = cpu_to_le32(ch_id); memcpy(data, (u8 *)&ch_id_le + eeprom->offset, eeprom->len); /* update cached value */ dev->can_channel_id = ch_id; return err; } /* Every CAN-USB device exports the dev_get_can_channel_id()/dev_set_can_channel_id() * operations. They are used here to set the new user defined CAN channel ID. */ int peak_usb_set_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom, u8 *data) { struct peak_usb_device *dev = netdev_priv(netdev); u32 ch_id; __le32 ch_id_le; int err; /* first, read the current user defined CAN channel ID */ err = dev->adapter->dev_get_can_channel_id(dev, &ch_id); if (err) { netdev_err(netdev, "Failed to init CAN channel id (err %d)\n", err); return err; } /* do update the value with user given bytes. * ethtool operates on individual bytes. The byte order of the CAN * channel ID in memory depends on the kernel architecture. We * convert the CAN channel ID back to the native byte order of the PEAK * device itself to ensure that the order is consistent for all * host architectures. */ ch_id_le = cpu_to_le32(ch_id); memcpy((u8 *)&ch_id_le + eeprom->offset, data, eeprom->len); ch_id = le32_to_cpu(ch_id_le); /* flash the new value now */ err = dev->adapter->dev_set_can_channel_id(dev, ch_id); if (err) { netdev_err(netdev, "Failed to write new CAN channel id (err %d)\n", err); return err; } /* update cached value with the new one */ dev->can_channel_id = ch_id; return 0; } int pcan_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE; info->tx_types = BIT(HWTSTAMP_TX_OFF); info->rx_filters = BIT(HWTSTAMP_FILTER_ALL); return 0; } /* * create one device which is attached to CAN controller #ctrl_idx of the * usb adapter. */ static int peak_usb_create_dev(const struct peak_usb_adapter *peak_usb_adapter, struct usb_interface *intf, int ctrl_idx) { struct usb_device *usb_dev = interface_to_usbdev(intf); int sizeof_candev = peak_usb_adapter->sizeof_dev_private; struct peak_usb_device *dev; struct net_device *netdev; int i, err; u16 tmp16; if (sizeof_candev < sizeof(struct peak_usb_device)) sizeof_candev = sizeof(struct peak_usb_device); netdev = alloc_candev(sizeof_candev, PCAN_USB_MAX_TX_URBS); if (!netdev) { dev_err(&intf->dev, "%s: couldn't alloc candev\n", PCAN_USB_DRIVER_NAME); return -ENOMEM; } dev = netdev_priv(netdev); /* allocate a buffer large enough to send commands */ dev->cmd_buf = kzalloc(PCAN_USB_MAX_CMD_LEN, GFP_KERNEL); if (!dev->cmd_buf) { err = -ENOMEM; goto lbl_free_candev; } dev->udev = usb_dev; dev->netdev = netdev; dev->adapter = peak_usb_adapter; dev->ctrl_idx = ctrl_idx; dev->state = PCAN_USB_STATE_CONNECTED; dev->ep_msg_in = peak_usb_adapter->ep_msg_in; dev->ep_msg_out = peak_usb_adapter->ep_msg_out[ctrl_idx]; dev->can.clock = peak_usb_adapter->clock; dev->can.bittiming_const = peak_usb_adapter->bittiming_const; dev->can.do_set_bittiming = peak_usb_set_bittiming; dev->can.fd.data_bittiming_const = peak_usb_adapter->data_bittiming_const; dev->can.fd.do_set_data_bittiming = peak_usb_set_data_bittiming; dev->can.do_set_mode = peak_usb_set_mode; dev->can.do_get_berr_counter = peak_usb_adapter->do_get_berr_counter; dev->can.ctrlmode_supported = peak_usb_adapter->ctrlmode_supported; netdev->netdev_ops = &peak_usb_netdev_ops; netdev->flags |= IFF_ECHO; /* we support local echo */ /* add ethtool support */ netdev->ethtool_ops = peak_usb_adapter->ethtool_ops; /* register peak_usb sysfs files */ netdev->sysfs_groups[0] = &peak_usb_sysfs_group; init_usb_anchor(&dev->rx_submitted); init_usb_anchor(&dev->tx_submitted); atomic_set(&dev->active_tx_urbs, 0); for (i = 0; i < PCAN_USB_MAX_TX_URBS; i++) dev->tx_contexts[i].echo_index = PCAN_USB_MAX_TX_URBS; dev->prev_siblings = usb_get_intfdata(intf); usb_set_intfdata(intf, dev); SET_NETDEV_DEV(netdev, &intf->dev); netdev->dev_id = ctrl_idx; err = register_candev(netdev); if (err) { dev_err(&intf->dev, "couldn't register CAN device: %d\n", err); goto lbl_restore_intf_data; } if (dev->prev_siblings) (dev->prev_siblings)->next_siblings = dev; /* keep hw revision into the netdevice */ tmp16 = le16_to_cpu(usb_dev->descriptor.bcdDevice); dev->device_rev = tmp16 >> 8; if (dev->adapter->dev_init) { err = dev->adapter->dev_init(dev); if (err) goto lbl_unregister_candev; } /* set bus off */ if (dev->adapter->dev_set_bus) { err = dev->adapter->dev_set_bus(dev, 0); if (err) goto adap_dev_free; } /* get CAN channel id early */ dev->adapter->dev_get_can_channel_id(dev, &dev->can_channel_id); netdev_info(netdev, "attached to %s channel %u (device 0x%08X)\n", peak_usb_adapter->name, ctrl_idx, dev->can_channel_id); return 0; adap_dev_free: if (dev->adapter->dev_free) dev->adapter->dev_free(dev); lbl_unregister_candev: unregister_candev(netdev); lbl_restore_intf_data: usb_set_intfdata(intf, dev->prev_siblings); kfree(dev->cmd_buf); lbl_free_candev: free_candev(netdev); return err; } /* * called by the usb core when the device is unplugged from the system */ static void peak_usb_disconnect(struct usb_interface *intf) { struct peak_usb_device *dev; struct peak_usb_device *dev_prev_siblings; /* unregister as many netdev devices as siblings */ for (dev = usb_get_intfdata(intf); dev; dev = dev_prev_siblings) { struct net_device *netdev = dev->netdev; char name[IFNAMSIZ]; dev_prev_siblings = dev->prev_siblings; dev->state &= ~PCAN_USB_STATE_CONNECTED; strscpy(name, netdev->name, IFNAMSIZ); unregister_candev(netdev); kfree(dev->cmd_buf); dev->next_siblings = NULL; if (dev->adapter->dev_free) dev->adapter->dev_free(dev); free_candev(netdev); dev_info(&intf->dev, "%s removed\n", name); } usb_set_intfdata(intf, NULL); } /* * probe function for new PEAK-System devices */ static int peak_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { const struct peak_usb_adapter *peak_usb_adapter; int i, err = -ENOMEM; /* get corresponding PCAN-USB adapter */ peak_usb_adapter = (const struct peak_usb_adapter *)id->driver_info; /* got corresponding adapter: check if it handles current interface */ if (peak_usb_adapter->intf_probe) { err = peak_usb_adapter->intf_probe(intf); if (err) return err; } for (i = 0; i < peak_usb_adapter->ctrl_count; i++) { err = peak_usb_create_dev(peak_usb_adapter, intf, i); if (err) { /* deregister already created devices */ peak_usb_disconnect(intf); break; } } return err; } /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver peak_usb_driver = { .name = PCAN_USB_DRIVER_NAME, .disconnect = peak_usb_disconnect, .probe = peak_usb_probe, .id_table = peak_usb_table, }; static int __init peak_usb_init(void) { int err; /* register this driver with the USB subsystem */ err = usb_register(&peak_usb_driver); if (err) pr_err("%s: usb_register failed (err %d)\n", PCAN_USB_DRIVER_NAME, err); return err; } static int peak_usb_do_device_exit(struct device *d, void *arg) { struct usb_interface *intf = to_usb_interface(d); struct peak_usb_device *dev; /* stop as many netdev devices as siblings */ for (dev = usb_get_intfdata(intf); dev; dev = dev->prev_siblings) { struct net_device *netdev = dev->netdev; if (netif_device_present(netdev)) if (dev->adapter->dev_exit) dev->adapter->dev_exit(dev); } return 0; } static void __exit peak_usb_exit(void) { int err; /* last chance do send any synchronous commands here */ err = driver_for_each_device(&peak_usb_driver.driver, NULL, NULL, peak_usb_do_device_exit); if (err) pr_err("%s: failed to stop all can devices (err %d)\n", PCAN_USB_DRIVER_NAME, err); /* deregister this driver with the USB subsystem */ usb_deregister(&peak_usb_driver); pr_info("%s: PCAN-USB interfaces driver unloaded\n", PCAN_USB_DRIVER_NAME); } module_init(peak_usb_init); module_exit(peak_usb_exit);
1 1 1 3 2 2 2 2 3 3 4 3 10 4 6 3 2 1 1 1 2 3 3 3 3 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 // SPDX-License-Identifier: GPL-2.0-only /* * keyspan_remote: USB driver for the Keyspan DMR * * Copyright (C) 2005 Zymeta Corporation - Michael Downey (downey@zymeta.com) * * This driver has been put together with the support of Innosys, Inc. * and Keyspan, Inc the manufacturers of the Keyspan USB DMR product. */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/usb/input.h> /* Parameters that can be passed to the driver. */ static int debug; module_param(debug, int, 0444); MODULE_PARM_DESC(debug, "Enable extra debug messages and information"); /* Vendor and product ids */ #define USB_KEYSPAN_VENDOR_ID 0x06CD #define USB_KEYSPAN_PRODUCT_UIA11 0x0202 /* Defines for converting the data from the remote. */ #define ZERO 0x18 #define ZERO_MASK 0x1F /* 5 bits for a 0 */ #define ONE 0x3C #define ONE_MASK 0x3F /* 6 bits for a 1 */ #define SYNC 0x3F80 #define SYNC_MASK 0x3FFF /* 14 bits for a SYNC sequence */ #define STOP 0x00 #define STOP_MASK 0x1F /* 5 bits for the STOP sequence */ #define GAP 0xFF #define RECV_SIZE 8 /* The UIA-11 type have a 8 byte limit. */ /* * Table that maps the 31 possible keycodes to input keys. * Currently there are 15 and 17 button models so RESERVED codes * are blank areas in the mapping. */ static const unsigned short keyspan_key_table[] = { KEY_RESERVED, /* 0 is just a place holder. */ KEY_RESERVED, KEY_STOP, KEY_PLAYCD, KEY_RESERVED, KEY_PREVIOUSSONG, KEY_REWIND, KEY_FORWARD, KEY_NEXTSONG, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_PAUSE, KEY_VOLUMEUP, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_VOLUMEDOWN, KEY_RESERVED, KEY_UP, KEY_RESERVED, KEY_MUTE, KEY_LEFT, KEY_ENTER, KEY_RIGHT, KEY_RESERVED, KEY_RESERVED, KEY_DOWN, KEY_RESERVED, KEY_KPASTERISK, KEY_RESERVED, KEY_MENU }; /* table of devices that work with this driver */ static const struct usb_device_id keyspan_table[] = { { USB_DEVICE(USB_KEYSPAN_VENDOR_ID, USB_KEYSPAN_PRODUCT_UIA11) }, { } /* Terminating entry */ }; /* Structure to store all the real stuff that a remote sends to us. */ struct keyspan_message { u16 system; u8 button; u8 toggle; }; /* Structure used for all the bit testing magic needed to be done. */ struct bit_tester { u32 tester; int len; int pos; int bits_left; u8 buffer[32]; }; /* Structure to hold all of our driver specific stuff */ struct usb_keyspan { char name[128]; char phys[64]; unsigned short keymap[ARRAY_SIZE(keyspan_key_table)]; struct usb_device *udev; struct input_dev *input; struct usb_interface *interface; struct usb_endpoint_descriptor *in_endpoint; struct urb* irq_urb; int open; dma_addr_t in_dma; unsigned char *in_buffer; /* variables used to parse messages from remote. */ struct bit_tester data; int stage; int toggle; }; static struct usb_driver keyspan_driver; /* * Debug routine that prints out what we've received from the remote. */ static void keyspan_print(struct usb_keyspan* dev) /*unsigned char* data)*/ { char codes[4 * RECV_SIZE]; int i; for (i = 0; i < RECV_SIZE; i++) snprintf(codes + i * 3, 4, "%02x ", dev->in_buffer[i]); dev_info(&dev->udev->dev, "%s\n", codes); } /* * Routine that manages the bit_tester structure. It makes sure that there are * at least bits_needed bits loaded into the tester. */ static int keyspan_load_tester(struct usb_keyspan* dev, int bits_needed) { if (dev->data.bits_left >= bits_needed) return 0; /* * Somehow we've missed the last message. The message will be repeated * though so it's not too big a deal */ if (dev->data.pos >= dev->data.len) { dev_dbg(&dev->interface->dev, "%s - Error ran out of data. pos: %d, len: %d\n", __func__, dev->data.pos, dev->data.len); return -1; } /* Load as much as we can into the tester. */ while ((dev->data.bits_left + 7 < (sizeof(dev->data.tester) * 8)) && (dev->data.pos < dev->data.len)) { dev->data.tester += (dev->data.buffer[dev->data.pos++] << dev->data.bits_left); dev->data.bits_left += 8; } return 0; } static void keyspan_report_button(struct usb_keyspan *remote, int button, int press) { struct input_dev *input = remote->input; input_event(input, EV_MSC, MSC_SCAN, button); input_report_key(input, remote->keymap[button], press); input_sync(input); } /* * Routine that handles all the logic needed to parse out the message from the remote. */ static void keyspan_check_data(struct usb_keyspan *remote) { int i; int found = 0; struct keyspan_message message; switch(remote->stage) { case 0: /* * In stage 0 we want to find the start of a message. The remote sends a 0xFF as filler. * So the first byte that isn't a FF should be the start of a new message. */ for (i = 0; i < RECV_SIZE && remote->in_buffer[i] == GAP; ++i); if (i < RECV_SIZE) { memcpy(remote->data.buffer, remote->in_buffer, RECV_SIZE); remote->data.len = RECV_SIZE; remote->data.pos = 0; remote->data.tester = 0; remote->data.bits_left = 0; remote->stage = 1; } break; case 1: /* * Stage 1 we should have 16 bytes and should be able to detect a * SYNC. The SYNC is 14 bits, 7 0's and then 7 1's. */ memcpy(remote->data.buffer + remote->data.len, remote->in_buffer, RECV_SIZE); remote->data.len += RECV_SIZE; found = 0; while ((remote->data.bits_left >= 14 || remote->data.pos < remote->data.len) && !found) { for (i = 0; i < 8; ++i) { if (keyspan_load_tester(remote, 14) != 0) { remote->stage = 0; return; } if ((remote->data.tester & SYNC_MASK) == SYNC) { remote->data.tester = remote->data.tester >> 14; remote->data.bits_left -= 14; found = 1; break; } else { remote->data.tester = remote->data.tester >> 1; --remote->data.bits_left; } } } if (!found) { remote->stage = 0; remote->data.len = 0; } else { remote->stage = 2; } break; case 2: /* * Stage 2 we should have 24 bytes which will be enough for a full * message. We need to parse out the system code, button code, * toggle code, and stop. */ memcpy(remote->data.buffer + remote->data.len, remote->in_buffer, RECV_SIZE); remote->data.len += RECV_SIZE; message.system = 0; for (i = 0; i < 9; i++) { keyspan_load_tester(remote, 6); if ((remote->data.tester & ZERO_MASK) == ZERO) { message.system = message.system << 1; remote->data.tester = remote->data.tester >> 5; remote->data.bits_left -= 5; } else if ((remote->data.tester & ONE_MASK) == ONE) { message.system = (message.system << 1) + 1; remote->data.tester = remote->data.tester >> 6; remote->data.bits_left -= 6; } else { dev_err(&remote->interface->dev, "%s - Unknown sequence found in system data.\n", __func__); remote->stage = 0; return; } } message.button = 0; for (i = 0; i < 5; i++) { keyspan_load_tester(remote, 6); if ((remote->data.tester & ZERO_MASK) == ZERO) { message.button = message.button << 1; remote->data.tester = remote->data.tester >> 5; remote->data.bits_left -= 5; } else if ((remote->data.tester & ONE_MASK) == ONE) { message.button = (message.button << 1) + 1; remote->data.tester = remote->data.tester >> 6; remote->data.bits_left -= 6; } else { dev_err(&remote->interface->dev, "%s - Unknown sequence found in button data.\n", __func__); remote->stage = 0; return; } } keyspan_load_tester(remote, 6); if ((remote->data.tester & ZERO_MASK) == ZERO) { message.toggle = 0; remote->data.tester = remote->data.tester >> 5; remote->data.bits_left -= 5; } else if ((remote->data.tester & ONE_MASK) == ONE) { message.toggle = 1; remote->data.tester = remote->data.tester >> 6; remote->data.bits_left -= 6; } else { dev_err(&remote->interface->dev, "%s - Error in message, invalid toggle.\n", __func__); remote->stage = 0; return; } keyspan_load_tester(remote, 5); if ((remote->data.tester & STOP_MASK) == STOP) { remote->data.tester = remote->data.tester >> 5; remote->data.bits_left -= 5; } else { dev_err(&remote->interface->dev, "Bad message received, no stop bit found.\n"); } dev_dbg(&remote->interface->dev, "%s found valid message: system: %d, button: %d, toggle: %d\n", __func__, message.system, message.button, message.toggle); if (message.toggle != remote->toggle) { keyspan_report_button(remote, message.button, 1); keyspan_report_button(remote, message.button, 0); remote->toggle = message.toggle; } remote->stage = 0; break; } } /* * Routine for sending all the initialization messages to the remote. */ static int keyspan_setup(struct usb_device* dev) { int retval = 0; retval = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 0x11, 0x40, 0x5601, 0x0, NULL, 0, USB_CTRL_SET_TIMEOUT); if (retval) { dev_dbg(&dev->dev, "%s - failed to set bit rate due to error: %d\n", __func__, retval); return(retval); } retval = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 0x44, 0x40, 0x0, 0x0, NULL, 0, USB_CTRL_SET_TIMEOUT); if (retval) { dev_dbg(&dev->dev, "%s - failed to set resume sensitivity due to error: %d\n", __func__, retval); return(retval); } retval = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 0x22, 0x40, 0x0, 0x0, NULL, 0, USB_CTRL_SET_TIMEOUT); if (retval) { dev_dbg(&dev->dev, "%s - failed to turn receive on due to error: %d\n", __func__, retval); return(retval); } dev_dbg(&dev->dev, "%s - Setup complete.\n", __func__); return(retval); } /* * Routine used to handle a new message that has come in. */ static void keyspan_irq_recv(struct urb *urb) { struct usb_keyspan *dev = urb->context; int retval; /* Check our status in case we need to bail out early. */ switch (urb->status) { case 0: break; /* Device went away so don't keep trying to read from it. */ case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: return; default: goto resubmit; } if (debug) keyspan_print(dev); keyspan_check_data(dev); resubmit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(&dev->interface->dev, "%s - usb_submit_urb failed with result: %d\n", __func__, retval); } static int keyspan_open(struct input_dev *dev) { struct usb_keyspan *remote = input_get_drvdata(dev); remote->irq_urb->dev = remote->udev; if (usb_submit_urb(remote->irq_urb, GFP_KERNEL)) return -EIO; return 0; } static void keyspan_close(struct input_dev *dev) { struct usb_keyspan *remote = input_get_drvdata(dev); usb_kill_urb(remote->irq_urb); } static struct usb_endpoint_descriptor *keyspan_get_in_endpoint(struct usb_host_interface *iface) { struct usb_endpoint_descriptor *endpoint; int i; for (i = 0; i < iface->desc.bNumEndpoints; ++i) { endpoint = &iface->endpoint[i].desc; if (usb_endpoint_is_int_in(endpoint)) { /* we found our interrupt in endpoint */ return endpoint; } } return NULL; } /* * Routine that sets up the driver to handle a specific USB device detected on the bus. */ static int keyspan_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(interface); struct usb_endpoint_descriptor *endpoint; struct usb_keyspan *remote; struct input_dev *input_dev; int i, error; endpoint = keyspan_get_in_endpoint(interface->cur_altsetting); if (!endpoint) return -ENODEV; remote = kzalloc(sizeof(*remote), GFP_KERNEL); input_dev = input_allocate_device(); if (!remote || !input_dev) { error = -ENOMEM; goto fail1; } remote->udev = udev; remote->input = input_dev; remote->interface = interface; remote->in_endpoint = endpoint; remote->toggle = -1; /* Set to -1 so we will always not match the toggle from the first remote message. */ remote->in_buffer = usb_alloc_coherent(udev, RECV_SIZE, GFP_KERNEL, &remote->in_dma); if (!remote->in_buffer) { error = -ENOMEM; goto fail1; } remote->irq_urb = usb_alloc_urb(0, GFP_KERNEL); if (!remote->irq_urb) { error = -ENOMEM; goto fail2; } error = keyspan_setup(udev); if (error) { error = -ENODEV; goto fail3; } if (udev->manufacturer) strscpy(remote->name, udev->manufacturer, sizeof(remote->name)); if (udev->product) { if (udev->manufacturer) strlcat(remote->name, " ", sizeof(remote->name)); strlcat(remote->name, udev->product, sizeof(remote->name)); } if (!strlen(remote->name)) snprintf(remote->name, sizeof(remote->name), "USB Keyspan Remote %04x:%04x", le16_to_cpu(udev->descriptor.idVendor), le16_to_cpu(udev->descriptor.idProduct)); usb_make_path(udev, remote->phys, sizeof(remote->phys)); strlcat(remote->phys, "/input0", sizeof(remote->phys)); memcpy(remote->keymap, keyspan_key_table, sizeof(remote->keymap)); input_dev->name = remote->name; input_dev->phys = remote->phys; usb_to_input_id(udev, &input_dev->id); input_dev->dev.parent = &interface->dev; input_dev->keycode = remote->keymap; input_dev->keycodesize = sizeof(unsigned short); input_dev->keycodemax = ARRAY_SIZE(remote->keymap); input_set_capability(input_dev, EV_MSC, MSC_SCAN); __set_bit(EV_KEY, input_dev->evbit); for (i = 0; i < ARRAY_SIZE(keyspan_key_table); i++) __set_bit(keyspan_key_table[i], input_dev->keybit); __clear_bit(KEY_RESERVED, input_dev->keybit); input_set_drvdata(input_dev, remote); input_dev->open = keyspan_open; input_dev->close = keyspan_close; /* * Initialize the URB to access the device. * The urb gets sent to the device in keyspan_open() */ usb_fill_int_urb(remote->irq_urb, remote->udev, usb_rcvintpipe(remote->udev, endpoint->bEndpointAddress), remote->in_buffer, RECV_SIZE, keyspan_irq_recv, remote, endpoint->bInterval); remote->irq_urb->transfer_dma = remote->in_dma; remote->irq_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; /* we can register the device now, as it is ready */ error = input_register_device(remote->input); if (error) goto fail3; /* save our data pointer in this interface device */ usb_set_intfdata(interface, remote); return 0; fail3: usb_free_urb(remote->irq_urb); fail2: usb_free_coherent(udev, RECV_SIZE, remote->in_buffer, remote->in_dma); fail1: kfree(remote); input_free_device(input_dev); return error; } /* * Routine called when a device is disconnected from the USB. */ static void keyspan_disconnect(struct usb_interface *interface) { struct usb_keyspan *remote; remote = usb_get_intfdata(interface); usb_set_intfdata(interface, NULL); if (remote) { /* We have a valid driver structure so clean up everything we allocated. */ input_unregister_device(remote->input); usb_kill_urb(remote->irq_urb); usb_free_urb(remote->irq_urb); usb_free_coherent(remote->udev, RECV_SIZE, remote->in_buffer, remote->in_dma); kfree(remote); } } /* * Standard driver set up sections */ static struct usb_driver keyspan_driver = { .name = "keyspan_remote", .probe = keyspan_probe, .disconnect = keyspan_disconnect, .id_table = keyspan_table }; module_usb_driver(keyspan_driver); MODULE_DEVICE_TABLE(usb, keyspan_table); MODULE_AUTHOR("Michael Downey <downey@zymeta.com>"); MODULE_DESCRIPTION("Driver for the USB Keyspan remote control."); MODULE_LICENSE("GPL");
406 299 112 112 155 154 116 116 187 187 191 191 137 137 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/cache.h> #include <linux/random.h> #include <linux/hrtimer.h> #include <linux/ktime.h> #include <linux/string.h> #include <linux/net.h> #include <linux/siphash.h> #include <net/secure_seq.h> #if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET) #include <linux/in6.h> #include <net/tcp.h> static siphash_aligned_key_t net_secret; static siphash_aligned_key_t ts_secret; #define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ) static __always_inline void net_secret_init(void) { net_get_random_once(&net_secret, sizeof(net_secret)); } static __always_inline void ts_secret_init(void) { net_get_random_once(&ts_secret, sizeof(ts_secret)); } #endif #ifdef CONFIG_INET static u32 seq_scale(u32 seq) { /* * As close as possible to RFC 793, which * suggests using a 250 kHz clock. * Further reading shows this assumes 2 Mb/s networks. * For 10 Mb/s Ethernet, a 1 MHz clock is appropriate. * For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but * we also need to limit the resolution so that the u32 seq * overlaps less than one time per MSL (2 minutes). * Choosing a clock of 64 ns period is OK. (period of 274 s) */ return seq + (ktime_get_real_ns() >> 6); } #endif #if IS_ENABLED(CONFIG_IPV6) u32 secure_tcpv6_ts_off(const struct net *net, const __be32 *saddr, const __be32 *daddr) { const struct { struct in6_addr saddr; struct in6_addr daddr; } __aligned(SIPHASH_ALIGNMENT) combined = { .saddr = *(struct in6_addr *)saddr, .daddr = *(struct in6_addr *)daddr, }; if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); return siphash(&combined, offsetofend(typeof(combined), daddr), &ts_secret); } EXPORT_IPV6_MOD(secure_tcpv6_ts_off); u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, __be16 sport, __be16 dport) { const struct { struct in6_addr saddr; struct in6_addr daddr; __be16 sport; __be16 dport; } __aligned(SIPHASH_ALIGNMENT) combined = { .saddr = *(struct in6_addr *)saddr, .daddr = *(struct in6_addr *)daddr, .sport = sport, .dport = dport }; u32 hash; net_secret_init(); hash = siphash(&combined, offsetofend(typeof(combined), dport), &net_secret); return seq_scale(hash); } EXPORT_SYMBOL(secure_tcpv6_seq); u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport) { const struct { struct in6_addr saddr; struct in6_addr daddr; unsigned int timeseed; __be16 dport; } __aligned(SIPHASH_ALIGNMENT) combined = { .saddr = *(struct in6_addr *)saddr, .daddr = *(struct in6_addr *)daddr, .timeseed = jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD, .dport = dport, }; net_secret_init(); return siphash(&combined, offsetofend(typeof(combined), dport), &net_secret); } EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr) { if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); return siphash_2u32((__force u32)saddr, (__force u32)daddr, &ts_secret); } /* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d), * but fortunately, `sport' cannot be 0 in any circumstances. If this changes, * it would be easy enough to have the former function use siphash_4u32, passing * the arguments as separate u32. */ u32 secure_tcp_seq(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport) { u32 hash; net_secret_init(); hash = siphash_3u32((__force u32)saddr, (__force u32)daddr, (__force u32)sport << 16 | (__force u32)dport, &net_secret); return seq_scale(hash); } EXPORT_SYMBOL_GPL(secure_tcp_seq); u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { net_secret_init(); return siphash_4u32((__force u32)saddr, (__force u32)daddr, (__force u16)dport, jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD, &net_secret); } EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); #endif
75 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __ASM_GENERIC_CHECKSUM_H #define __ASM_GENERIC_CHECKSUM_H #include <linux/bitops.h> /* * computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit) * * returns a 32-bit number suitable for feeding into itself * or csum_tcpudp_magic * * this function must be called with even lengths, except * for the last fragment, which may be odd * * it's best to have buff aligned on a 32-bit boundary */ extern __wsum csum_partial(const void *buff, int len, __wsum sum); #ifndef ip_fast_csum /* * This is a version of ip_compute_csum() optimized for IP headers, * which always checksum on 4 octet boundaries. */ extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl); #endif #ifndef csum_fold /* * Fold a partial checksum */ static inline __sum16 csum_fold(__wsum csum) { u32 sum = (__force u32)csum; return (__force __sum16)((~sum - ror32(sum, 16)) >> 16); } #endif #ifndef csum_tcpudp_nofold /* * computes the checksum of the TCP/UDP pseudo-header * returns a 16-bit checksum, already complemented */ extern __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum); #endif #ifndef csum_tcpudp_magic static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) { return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); } #endif /* * this routine is used for miscellaneous IP-like checksums, mainly * in icmp.c */ extern __sum16 ip_compute_csum(const void *buff, int len); #endif /* __ASM_GENERIC_CHECKSUM_H */
10 10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 // SPDX-License-Identifier: GPL-2.0-only /* * xt_conntrack - Netfilter module to match connection tracking * information. (Superset of Rusty's minimalistic state match.) * * (C) 2001 Marc Boucher (marc@mbsi.ca). * (C) 2006-2012 Patrick McHardy <kaber@trash.net> * Copyright © CC Computer Consultants GmbH, 2007 - 2008 */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <net/ipv6.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_conntrack.h> #include <net/netfilter/nf_conntrack.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); MODULE_DESCRIPTION("Xtables: connection tracking state match"); MODULE_ALIAS("ipt_conntrack"); MODULE_ALIAS("ip6t_conntrack"); static bool conntrack_addrcmp(const union nf_inet_addr *kaddr, const union nf_inet_addr *uaddr, const union nf_inet_addr *umask, unsigned int l3proto) { if (l3proto == NFPROTO_IPV4) return ((kaddr->ip ^ uaddr->ip) & umask->ip) == 0; else if (l3proto == NFPROTO_IPV6) return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6, &uaddr->in6) == 0; else return false; } static inline bool conntrack_mt_origsrc(const struct nf_conn *ct, const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, &info->origsrc_addr, &info->origsrc_mask, family); } static inline bool conntrack_mt_origdst(const struct nf_conn *ct, const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3, &info->origdst_addr, &info->origdst_mask, family); } static inline bool conntrack_mt_replsrc(const struct nf_conn *ct, const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3, &info->replsrc_addr, &info->replsrc_mask, family); } static inline bool conntrack_mt_repldst(const struct nf_conn *ct, const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3, &info->repldst_addr, &info->repldst_mask, family); } static inline bool ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info, const struct nf_conn *ct) { const struct nf_conntrack_tuple *tuple; tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; if ((info->match_flags & XT_CONNTRACK_PROTO) && (nf_ct_protonum(ct) == info->l4proto) ^ !(info->invert_flags & XT_CONNTRACK_PROTO)) return false; /* Shortcut to match all recognized protocols by using ->src.all. */ if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) && (tuple->src.u.all == info->origsrc_port) ^ !(info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT)) return false; if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) && (tuple->dst.u.all == info->origdst_port) ^ !(info->invert_flags & XT_CONNTRACK_ORIGDST_PORT)) return false; tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) && (tuple->src.u.all == info->replsrc_port) ^ !(info->invert_flags & XT_CONNTRACK_REPLSRC_PORT)) return false; if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) && (tuple->dst.u.all == info->repldst_port) ^ !(info->invert_flags & XT_CONNTRACK_REPLDST_PORT)) return false; return true; } static inline bool port_match(u16 min, u16 max, u16 port, bool invert) { return (port >= min && port <= max) ^ invert; } static inline bool ct_proto_port_check_v3(const struct xt_conntrack_mtinfo3 *info, const struct nf_conn *ct) { const struct nf_conntrack_tuple *tuple; tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; if ((info->match_flags & XT_CONNTRACK_PROTO) && (nf_ct_protonum(ct) == info->l4proto) ^ !(info->invert_flags & XT_CONNTRACK_PROTO)) return false; /* Shortcut to match all recognized protocols by using ->src.all. */ if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) && !port_match(info->origsrc_port, info->origsrc_port_high, ntohs(tuple->src.u.all), info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT)) return false; if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) && !port_match(info->origdst_port, info->origdst_port_high, ntohs(tuple->dst.u.all), info->invert_flags & XT_CONNTRACK_ORIGDST_PORT)) return false; tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) && !port_match(info->replsrc_port, info->replsrc_port_high, ntohs(tuple->src.u.all), info->invert_flags & XT_CONNTRACK_REPLSRC_PORT)) return false; if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) && !port_match(info->repldst_port, info->repldst_port_high, ntohs(tuple->dst.u.all), info->invert_flags & XT_CONNTRACK_REPLDST_PORT)) return false; return true; } static bool conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, u16 state_mask, u16 status_mask) { const struct xt_conntrack_mtinfo2 *info = par->matchinfo; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; unsigned int statebit; ct = nf_ct_get(skb, &ctinfo); if (ct) statebit = XT_CONNTRACK_STATE_BIT(ctinfo); else if (ctinfo == IP_CT_UNTRACKED) statebit = XT_CONNTRACK_STATE_UNTRACKED; else statebit = XT_CONNTRACK_STATE_INVALID; if (info->match_flags & XT_CONNTRACK_STATE) { if (ct != NULL) { if (test_bit(IPS_SRC_NAT_BIT, &ct->status)) statebit |= XT_CONNTRACK_STATE_SNAT; if (test_bit(IPS_DST_NAT_BIT, &ct->status)) statebit |= XT_CONNTRACK_STATE_DNAT; } if (!!(state_mask & statebit) ^ !(info->invert_flags & XT_CONNTRACK_STATE)) return false; } if (ct == NULL) return info->match_flags & XT_CONNTRACK_STATE; if ((info->match_flags & XT_CONNTRACK_DIRECTION) && (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ^ !(info->invert_flags & XT_CONNTRACK_DIRECTION)) return false; if (info->match_flags & XT_CONNTRACK_ORIGSRC) if (conntrack_mt_origsrc(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_ORIGSRC)) return false; if (info->match_flags & XT_CONNTRACK_ORIGDST) if (conntrack_mt_origdst(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_ORIGDST)) return false; if (info->match_flags & XT_CONNTRACK_REPLSRC) if (conntrack_mt_replsrc(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_REPLSRC)) return false; if (info->match_flags & XT_CONNTRACK_REPLDST) if (conntrack_mt_repldst(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_REPLDST)) return false; if (par->match->revision != 3) { if (!ct_proto_port_check(info, ct)) return false; } else { if (!ct_proto_port_check_v3(par->matchinfo, ct)) return false; } if ((info->match_flags & XT_CONNTRACK_STATUS) && (!!(status_mask & ct->status) ^ !(info->invert_flags & XT_CONNTRACK_STATUS))) return false; if (info->match_flags & XT_CONNTRACK_EXPIRES) { unsigned long expires = nf_ct_expires(ct) / HZ; if ((expires >= info->expires_min && expires <= info->expires_max) ^ !(info->invert_flags & XT_CONNTRACK_EXPIRES)) return false; } return true; } static bool conntrack_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_conntrack_mtinfo1 *info = par->matchinfo; return conntrack_mt(skb, par, info->state_mask, info->status_mask); } static bool conntrack_mt_v2(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_conntrack_mtinfo2 *info = par->matchinfo; return conntrack_mt(skb, par, info->state_mask, info->status_mask); } static bool conntrack_mt_v3(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_conntrack_mtinfo3 *info = par->matchinfo; return conntrack_mt(skb, par, info->state_mask, info->status_mask); } static int conntrack_mt_check(const struct xt_mtchk_param *par) { int ret; ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info_ratelimited("cannot load conntrack support for proto=%u\n", par->family); return ret; } static void conntrack_mt_destroy(const struct xt_mtdtor_param *par) { nf_ct_netns_put(par->net, par->family); } static struct xt_match conntrack_mt_reg[] __read_mostly = { { .name = "conntrack", .revision = 1, .family = NFPROTO_UNSPEC, .matchsize = sizeof(struct xt_conntrack_mtinfo1), .match = conntrack_mt_v1, .checkentry = conntrack_mt_check, .destroy = conntrack_mt_destroy, .me = THIS_MODULE, }, { .name = "conntrack", .revision = 2, .family = NFPROTO_UNSPEC, .matchsize = sizeof(struct xt_conntrack_mtinfo2), .match = conntrack_mt_v2, .checkentry = conntrack_mt_check, .destroy = conntrack_mt_destroy, .me = THIS_MODULE, }, { .name = "conntrack", .revision = 3, .family = NFPROTO_UNSPEC, .matchsize = sizeof(struct xt_conntrack_mtinfo3), .match = conntrack_mt_v3, .checkentry = conntrack_mt_check, .destroy = conntrack_mt_destroy, .me = THIS_MODULE, }, }; static int __init conntrack_mt_init(void) { return xt_register_matches(conntrack_mt_reg, ARRAY_SIZE(conntrack_mt_reg)); } static void __exit conntrack_mt_exit(void) { xt_unregister_matches(conntrack_mt_reg, ARRAY_SIZE(conntrack_mt_reg)); } module_init(conntrack_mt_init); module_exit(conntrack_mt_exit);
288 26 2 24 24 1 8 15 40 43 6 1 40 5465 4 3 1 1 1 2 1 36 41 15 4 29 1 28 29 32 32 29 208 152 152 5184 5183 5191 5187 5189 143 144 143 143 3 11 11 143 97 71 283 32 41 41 32 32 3 4 27 32 32 32 18 16 32 32 23 23 41 41 27 19 41 41 7 7 6 40 40 93 93 93 62 64 22 22 3 352 33 62 3 3 10 10 10 27 290 10 5 67 5 3 3 9 1 5 3 5186 14 96 5245 5253 1 48 97 4318 5186 4600 5076 1 1 2 5076 11 5209 16 5250 2 274 71 205 4 13 276 13 3 23 24 1 23 23 24 33 24 23 266 25 266 258 11 277 285 283 284 266 26 285 265 26 285 277 10 285 264 26 283 284 1 284 284 284 179 111 111 111 14 97 55 55 55 55 19 19 19 2 6 17 61 61 58 56 56 56 56 16 3 3 3 14 8 16 16 46 14 8 5089 5089 60 61 25 36 16 71 33 37 71 44 61 20 40 6 40 9 1 40 40 37 37 8 14 20 27 26 2 2 14 34 34 24 34 28 1 27 1 26 12 17 1 16 231 117 34 319 337 5 10 4 6 325 5 250 3 72 319 65 286 10 27 33 318 3 2 285 1 3 2 1 55 317 34 334 337 336 3 2 2 2 88 12 6 71 71 28 28 61 2 59 40 40 25 16 1 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 // SPDX-License-Identifier: GPL-2.0-or-later /* * fs/eventpoll.c (Efficient event retrieval implementation) * Copyright (C) 2001,...,2009 Davide Libenzi * * Davide Libenzi <davidel@xmailserver.org> */ #include <linux/init.h> #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/signal.h> #include <linux/errno.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/poll.h> #include <linux/string.h> #include <linux/list.h> #include <linux/hash.h> #include <linux/spinlock.h> #include <linux/syscalls.h> #include <linux/rbtree.h> #include <linux/wait.h> #include <linux/eventpoll.h> #include <linux/mount.h> #include <linux/bitops.h> #include <linux/mutex.h> #include <linux/anon_inodes.h> #include <linux/device.h> #include <linux/uaccess.h> #include <asm/io.h> #include <asm/mman.h> #include <linux/atomic.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/compat.h> #include <linux/rculist.h> #include <linux/capability.h> #include <net/busy_poll.h> /* * LOCKING: * There are three level of locking required by epoll : * * 1) epnested_mutex (mutex) * 2) ep->mtx (mutex) * 3) ep->lock (rwlock) * * The acquire order is the one listed above, from 1 to 3. * We need a rwlock (ep->lock) because we manipulate objects * from inside the poll callback, that might be triggered from * a wake_up() that in turn might be called from IRQ context. * So we can't sleep inside the poll callback and hence we need * a spinlock. During the event transfer loop (from kernel to * user space) we could end up sleeping due a copy_to_user(), so * we need a lock that will allow us to sleep. This lock is a * mutex (ep->mtx). It is acquired during the event transfer loop, * during epoll_ctl(EPOLL_CTL_DEL) and during eventpoll_release_file(). * The epnested_mutex is acquired when inserting an epoll fd onto another * epoll fd. We do this so that we walk the epoll tree and ensure that this * insertion does not create a cycle of epoll file descriptors, which * could lead to deadlock. We need a global mutex to prevent two * simultaneous inserts (A into B and B into A) from racing and * constructing a cycle without either insert observing that it is * going to. * It is necessary to acquire multiple "ep->mtx"es at once in the * case when one epoll fd is added to another. In this case, we * always acquire the locks in the order of nesting (i.e. after * epoll_ctl(e1, EPOLL_CTL_ADD, e2), e1->mtx will always be acquired * before e2->mtx). Since we disallow cycles of epoll file * descriptors, this ensures that the mutexes are well-ordered. In * order to communicate this nesting to lockdep, when walking a tree * of epoll file descriptors, we use the current recursion depth as * the lockdep subkey. * It is possible to drop the "ep->mtx" and to use the global * mutex "epnested_mutex" (together with "ep->lock") to have it working, * but having "ep->mtx" will make the interface more scalable. * Events that require holding "epnested_mutex" are very rare, while for * normal operations the epoll private "ep->mtx" will guarantee * a better scalability. */ /* Epoll private bits inside the event mask */ #define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE) #define EPOLLINOUT_BITS (EPOLLIN | EPOLLOUT) #define EPOLLEXCLUSIVE_OK_BITS (EPOLLINOUT_BITS | EPOLLERR | EPOLLHUP | \ EPOLLWAKEUP | EPOLLET | EPOLLEXCLUSIVE) /* Maximum number of nesting allowed inside epoll sets */ #define EP_MAX_NESTS 4 #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) #define EP_UNACTIVE_PTR ((void *) -1L) #define EP_ITEM_COST (sizeof(struct epitem) + sizeof(struct eppoll_entry)) struct epoll_filefd { struct file *file; int fd; } __packed; /* Wait structure used by the poll hooks */ struct eppoll_entry { /* List header used to link this structure to the "struct epitem" */ struct eppoll_entry *next; /* The "base" pointer is set to the container "struct epitem" */ struct epitem *base; /* * Wait queue item that will be linked to the target file wait * queue head. */ wait_queue_entry_t wait; /* The wait queue head that linked the "wait" wait queue item */ wait_queue_head_t *whead; }; /* * Each file descriptor added to the eventpoll interface will * have an entry of this type linked to the "rbr" RB tree. * Avoid increasing the size of this struct, there can be many thousands * of these on a server and we do not want this to take another cache line. */ struct epitem { union { /* RB tree node links this structure to the eventpoll RB tree */ struct rb_node rbn; /* Used to free the struct epitem */ struct rcu_head rcu; }; /* List header used to link this structure to the eventpoll ready list */ struct list_head rdllink; /* * Works together "struct eventpoll"->ovflist in keeping the * single linked chain of items. */ struct epitem *next; /* The file descriptor information this item refers to */ struct epoll_filefd ffd; /* * Protected by file->f_lock, true for to-be-released epitem already * removed from the "struct file" items list; together with * eventpoll->refcount orchestrates "struct eventpoll" disposal */ bool dying; /* List containing poll wait queues */ struct eppoll_entry *pwqlist; /* The "container" of this item */ struct eventpoll *ep; /* List header used to link this item to the "struct file" items list */ struct hlist_node fllink; /* wakeup_source used when EPOLLWAKEUP is set */ struct wakeup_source __rcu *ws; /* The structure that describe the interested events and the source fd */ struct epoll_event event; }; /* * This structure is stored inside the "private_data" member of the file * structure and represents the main data structure for the eventpoll * interface. */ struct eventpoll { /* * This mutex is used to ensure that files are not removed * while epoll is using them. This is held during the event * collection loop, the file cleanup path, the epoll file exit * code and the ctl operations. */ struct mutex mtx; /* Wait queue used by sys_epoll_wait() */ wait_queue_head_t wq; /* Wait queue used by file->poll() */ wait_queue_head_t poll_wait; /* List of ready file descriptors */ struct list_head rdllist; /* Lock which protects rdllist and ovflist */ rwlock_t lock; /* RB tree root used to store monitored fd structs */ struct rb_root_cached rbr; /* * This is a single linked list that chains all the "struct epitem" that * happened while transferring ready events to userspace w/out * holding ->lock. */ struct epitem *ovflist; /* wakeup_source used when ep_send_events or __ep_eventpoll_poll is running */ struct wakeup_source *ws; /* The user that created the eventpoll descriptor */ struct user_struct *user; struct file *file; /* used to optimize loop detection check */ u64 gen; struct hlist_head refs; /* * usage count, used together with epitem->dying to * orchestrate the disposal of this struct */ refcount_t refcount; #ifdef CONFIG_NET_RX_BUSY_POLL /* used to track busy poll napi_id */ unsigned int napi_id; /* busy poll timeout */ u32 busy_poll_usecs; /* busy poll packet budget */ u16 busy_poll_budget; bool prefer_busy_poll; #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC /* tracks wakeup nests for lockdep validation */ u8 nests; #endif }; /* Wrapper struct used by poll queueing */ struct ep_pqueue { poll_table pt; struct epitem *epi; }; /* * Configuration options available inside /proc/sys/fs/epoll/ */ /* Maximum number of epoll watched descriptors, per user */ static long max_user_watches __read_mostly; /* Used for cycles detection */ static DEFINE_MUTEX(epnested_mutex); static u64 loop_check_gen = 0; /* Used to check for epoll file descriptor inclusion loops */ static struct eventpoll *inserting_into; /* Slab cache used to allocate "struct epitem" */ static struct kmem_cache *epi_cache __ro_after_init; /* Slab cache used to allocate "struct eppoll_entry" */ static struct kmem_cache *pwq_cache __ro_after_init; /* * List of files with newly added links, where we may need to limit the number * of emanating paths. Protected by the epnested_mutex. */ struct epitems_head { struct hlist_head epitems; struct epitems_head *next; }; static struct epitems_head *tfile_check_list = EP_UNACTIVE_PTR; static struct kmem_cache *ephead_cache __ro_after_init; static inline void free_ephead(struct epitems_head *head) { if (head) kmem_cache_free(ephead_cache, head); } static void list_file(struct file *file) { struct epitems_head *head; head = container_of(file->f_ep, struct epitems_head, epitems); if (!head->next) { head->next = tfile_check_list; tfile_check_list = head; } } static void unlist_file(struct epitems_head *head) { struct epitems_head *to_free = head; struct hlist_node *p = rcu_dereference(hlist_first_rcu(&head->epitems)); if (p) { struct epitem *epi= container_of(p, struct epitem, fllink); spin_lock(&epi->ffd.file->f_lock); if (!hlist_empty(&head->epitems)) to_free = NULL; head->next = NULL; spin_unlock(&epi->ffd.file->f_lock); } free_ephead(to_free); } #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> static long long_zero; static long long_max = LONG_MAX; static const struct ctl_table epoll_table[] = { { .procname = "max_user_watches", .data = &max_user_watches, .maxlen = sizeof(max_user_watches), .mode = 0644, .proc_handler = proc_doulongvec_minmax, .extra1 = &long_zero, .extra2 = &long_max, }, }; static void __init epoll_sysctls_init(void) { register_sysctl("fs/epoll", epoll_table); } #else #define epoll_sysctls_init() do { } while (0) #endif /* CONFIG_SYSCTL */ static const struct file_operations eventpoll_fops; static inline int is_file_epoll(struct file *f) { return f->f_op == &eventpoll_fops; } /* Setup the structure that is used as key for the RB tree */ static inline void ep_set_ffd(struct epoll_filefd *ffd, struct file *file, int fd) { ffd->file = file; ffd->fd = fd; } /* Compare RB tree keys */ static inline int ep_cmp_ffd(struct epoll_filefd *p1, struct epoll_filefd *p2) { return (p1->file > p2->file ? +1: (p1->file < p2->file ? -1 : p1->fd - p2->fd)); } /* Tells us if the item is currently linked */ static inline int ep_is_linked(struct epitem *epi) { return !list_empty(&epi->rdllink); } static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_entry_t *p) { return container_of(p, struct eppoll_entry, wait); } /* Get the "struct epitem" from a wait queue pointer */ static inline struct epitem *ep_item_from_wait(wait_queue_entry_t *p) { return container_of(p, struct eppoll_entry, wait)->base; } /** * ep_events_available - Checks if ready events might be available. * * @ep: Pointer to the eventpoll context. * * Return: a value different than %zero if ready events are available, * or %zero otherwise. */ static inline int ep_events_available(struct eventpoll *ep) { return !list_empty_careful(&ep->rdllist) || READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR; } #ifdef CONFIG_NET_RX_BUSY_POLL /** * busy_loop_ep_timeout - check if busy poll has timed out. The timeout value * from the epoll instance ep is preferred, but if it is not set fallback to * the system-wide global via busy_loop_timeout. * * @start_time: The start time used to compute the remaining time until timeout. * @ep: Pointer to the eventpoll context. * * Return: true if the timeout has expired, false otherwise. */ static bool busy_loop_ep_timeout(unsigned long start_time, struct eventpoll *ep) { unsigned long bp_usec = READ_ONCE(ep->busy_poll_usecs); if (bp_usec) { unsigned long end_time = start_time + bp_usec; unsigned long now = busy_loop_current_time(); return time_after(now, end_time); } else { return busy_loop_timeout(start_time); } } static bool ep_busy_loop_on(struct eventpoll *ep) { return !!READ_ONCE(ep->busy_poll_usecs) || READ_ONCE(ep->prefer_busy_poll) || net_busy_loop_on(); } static bool ep_busy_loop_end(void *p, unsigned long start_time) { struct eventpoll *ep = p; return ep_events_available(ep) || busy_loop_ep_timeout(start_time, ep); } /* * Busy poll if globally on and supporting sockets found && no events, * busy loop will return if need_resched or ep_events_available. * * we must do our busy polling with irqs enabled */ static bool ep_busy_loop(struct eventpoll *ep) { unsigned int napi_id = READ_ONCE(ep->napi_id); u16 budget = READ_ONCE(ep->busy_poll_budget); bool prefer_busy_poll = READ_ONCE(ep->prefer_busy_poll); if (!budget) budget = BUSY_POLL_BUDGET; if (napi_id_valid(napi_id) && ep_busy_loop_on(ep)) { napi_busy_loop(napi_id, ep_busy_loop_end, ep, prefer_busy_poll, budget); if (ep_events_available(ep)) return true; /* * Busy poll timed out. Drop NAPI ID for now, we can add * it back in when we have moved a socket with a valid NAPI * ID onto the ready list. */ if (prefer_busy_poll) napi_resume_irqs(napi_id); ep->napi_id = 0; return false; } return false; } /* * Set epoll busy poll NAPI ID from sk. */ static inline void ep_set_busy_poll_napi_id(struct epitem *epi) { struct eventpoll *ep = epi->ep; unsigned int napi_id; struct socket *sock; struct sock *sk; if (!ep_busy_loop_on(ep)) return; sock = sock_from_file(epi->ffd.file); if (!sock) return; sk = sock->sk; if (!sk) return; napi_id = READ_ONCE(sk->sk_napi_id); /* Non-NAPI IDs can be rejected * or * Nothing to do if we already have this ID */ if (!napi_id_valid(napi_id) || napi_id == ep->napi_id) return; /* record NAPI ID for use in next busy poll */ ep->napi_id = napi_id; } static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct eventpoll *ep = file->private_data; void __user *uarg = (void __user *)arg; struct epoll_params epoll_params; switch (cmd) { case EPIOCSPARAMS: if (copy_from_user(&epoll_params, uarg, sizeof(epoll_params))) return -EFAULT; /* pad byte must be zero */ if (epoll_params.__pad) return -EINVAL; if (epoll_params.busy_poll_usecs > S32_MAX) return -EINVAL; if (epoll_params.prefer_busy_poll > 1) return -EINVAL; if (epoll_params.busy_poll_budget > NAPI_POLL_WEIGHT && !capable(CAP_NET_ADMIN)) return -EPERM; WRITE_ONCE(ep->busy_poll_usecs, epoll_params.busy_poll_usecs); WRITE_ONCE(ep->busy_poll_budget, epoll_params.busy_poll_budget); WRITE_ONCE(ep->prefer_busy_poll, epoll_params.prefer_busy_poll); return 0; case EPIOCGPARAMS: memset(&epoll_params, 0, sizeof(epoll_params)); epoll_params.busy_poll_usecs = READ_ONCE(ep->busy_poll_usecs); epoll_params.busy_poll_budget = READ_ONCE(ep->busy_poll_budget); epoll_params.prefer_busy_poll = READ_ONCE(ep->prefer_busy_poll); if (copy_to_user(uarg, &epoll_params, sizeof(epoll_params))) return -EFAULT; return 0; default: return -ENOIOCTLCMD; } } static void ep_suspend_napi_irqs(struct eventpoll *ep) { unsigned int napi_id = READ_ONCE(ep->napi_id); if (napi_id_valid(napi_id) && READ_ONCE(ep->prefer_busy_poll)) napi_suspend_irqs(napi_id); } static void ep_resume_napi_irqs(struct eventpoll *ep) { unsigned int napi_id = READ_ONCE(ep->napi_id); if (napi_id_valid(napi_id) && READ_ONCE(ep->prefer_busy_poll)) napi_resume_irqs(napi_id); } #else static inline bool ep_busy_loop(struct eventpoll *ep) { return false; } static inline void ep_set_busy_poll_napi_id(struct epitem *epi) { } static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return -EOPNOTSUPP; } static void ep_suspend_napi_irqs(struct eventpoll *ep) { } static void ep_resume_napi_irqs(struct eventpoll *ep) { } #endif /* CONFIG_NET_RX_BUSY_POLL */ /* * As described in commit 0ccf831cb lockdep: annotate epoll * the use of wait queues used by epoll is done in a very controlled * manner. Wake ups can nest inside each other, but are never done * with the same locking. For example: * * dfd = socket(...); * efd1 = epoll_create(); * efd2 = epoll_create(); * epoll_ctl(efd1, EPOLL_CTL_ADD, dfd, ...); * epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...); * * When a packet arrives to the device underneath "dfd", the net code will * issue a wake_up() on its poll wake list. Epoll (efd1) has installed a * callback wakeup entry on that queue, and the wake_up() performed by the * "dfd" net code will end up in ep_poll_callback(). At this point epoll * (efd1) notices that it may have some event ready, so it needs to wake up * the waiters on its poll wait list (efd2). So it calls ep_poll_safewake() * that ends up in another wake_up(), after having checked about the * recursion constraints. That are, no more than EP_MAX_NESTS, to avoid * stack blasting. * * When CONFIG_DEBUG_LOCK_ALLOC is enabled, make sure lockdep can handle * this special case of epoll. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi, unsigned pollflags) { struct eventpoll *ep_src; unsigned long flags; u8 nests = 0; /* * To set the subclass or nesting level for spin_lock_irqsave_nested() * it might be natural to create a per-cpu nest count. However, since * we can recurse on ep->poll_wait.lock, and a non-raw spinlock can * schedule() in the -rt kernel, the per-cpu variable are no longer * protected. Thus, we are introducing a per eventpoll nest field. * If we are not being call from ep_poll_callback(), epi is NULL and * we are at the first level of nesting, 0. Otherwise, we are being * called from ep_poll_callback() and if a previous wakeup source is * not an epoll file itself, we are at depth 1 since the wakeup source * is depth 0. If the wakeup source is a previous epoll file in the * wakeup chain then we use its nests value and record ours as * nests + 1. The previous epoll file nests value is stable since its * already holding its own poll_wait.lock. */ if (epi) { if ((is_file_epoll(epi->ffd.file))) { ep_src = epi->ffd.file->private_data; nests = ep_src->nests; } else { nests = 1; } } spin_lock_irqsave_nested(&ep->poll_wait.lock, flags, nests); ep->nests = nests + 1; wake_up_locked_poll(&ep->poll_wait, EPOLLIN | pollflags); ep->nests = 0; spin_unlock_irqrestore(&ep->poll_wait.lock, flags); } #else static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi, __poll_t pollflags) { wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags); } #endif static void ep_remove_wait_queue(struct eppoll_entry *pwq) { wait_queue_head_t *whead; rcu_read_lock(); /* * If it is cleared by POLLFREE, it should be rcu-safe. * If we read NULL we need a barrier paired with * smp_store_release() in ep_poll_callback(), otherwise * we rely on whead->lock. */ whead = smp_load_acquire(&pwq->whead); if (whead) remove_wait_queue(whead, &pwq->wait); rcu_read_unlock(); } /* * This function unregisters poll callbacks from the associated file * descriptor. Must be called with "mtx" held. */ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) { struct eppoll_entry **p = &epi->pwqlist; struct eppoll_entry *pwq; while ((pwq = *p) != NULL) { *p = pwq->next; ep_remove_wait_queue(pwq); kmem_cache_free(pwq_cache, pwq); } } /* call only when ep->mtx is held */ static inline struct wakeup_source *ep_wakeup_source(struct epitem *epi) { return rcu_dereference_check(epi->ws, lockdep_is_held(&epi->ep->mtx)); } /* call only when ep->mtx is held */ static inline void ep_pm_stay_awake(struct epitem *epi) { struct wakeup_source *ws = ep_wakeup_source(epi); if (ws) __pm_stay_awake(ws); } static inline bool ep_has_wakeup_source(struct epitem *epi) { return rcu_access_pointer(epi->ws) ? true : false; } /* call when ep->mtx cannot be held (ep_poll_callback) */ static inline void ep_pm_stay_awake_rcu(struct epitem *epi) { struct wakeup_source *ws; rcu_read_lock(); ws = rcu_dereference(epi->ws); if (ws) __pm_stay_awake(ws); rcu_read_unlock(); } /* * ep->mutex needs to be held because we could be hit by * eventpoll_release_file() and epoll_ctl(). */ static void ep_start_scan(struct eventpoll *ep, struct list_head *txlist) { /* * Steal the ready list, and re-init the original one to the * empty list. Also, set ep->ovflist to NULL so that events * happening while looping w/out locks, are not lost. We cannot * have the poll callback to queue directly on ep->rdllist, * because we want the "sproc" callback to be able to do it * in a lockless way. */ lockdep_assert_irqs_enabled(); write_lock_irq(&ep->lock); list_splice_init(&ep->rdllist, txlist); WRITE_ONCE(ep->ovflist, NULL); write_unlock_irq(&ep->lock); } static void ep_done_scan(struct eventpoll *ep, struct list_head *txlist) { struct epitem *epi, *nepi; write_lock_irq(&ep->lock); /* * During the time we spent inside the "sproc" callback, some * other events might have been queued by the poll callback. * We re-insert them inside the main ready-list here. */ for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL; nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { /* * We need to check if the item is already in the list. * During the "sproc" callback execution time, items are * queued into ->ovflist but the "txlist" might already * contain them, and the list_splice() below takes care of them. */ if (!ep_is_linked(epi)) { /* * ->ovflist is LIFO, so we have to reverse it in order * to keep in FIFO. */ list_add(&epi->rdllink, &ep->rdllist); ep_pm_stay_awake(epi); } } /* * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after * releasing the lock, events will be queued in the normal way inside * ep->rdllist. */ WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR); /* * Quickly re-inject items left on "txlist". */ list_splice(txlist, &ep->rdllist); __pm_relax(ep->ws); if (!list_empty(&ep->rdllist)) { if (waitqueue_active(&ep->wq)) wake_up(&ep->wq); } write_unlock_irq(&ep->lock); } static void ep_get(struct eventpoll *ep) { refcount_inc(&ep->refcount); } /* * Returns true if the event poll can be disposed */ static bool ep_refcount_dec_and_test(struct eventpoll *ep) { if (!refcount_dec_and_test(&ep->refcount)) return false; WARN_ON_ONCE(!RB_EMPTY_ROOT(&ep->rbr.rb_root)); return true; } static void ep_free(struct eventpoll *ep) { ep_resume_napi_irqs(ep); mutex_destroy(&ep->mtx); free_uid(ep->user); wakeup_source_unregister(ep->ws); kfree(ep); } /* * Removes a "struct epitem" from the eventpoll RB tree and deallocates * all the associated resources. Must be called with "mtx" held. * If the dying flag is set, do the removal only if force is true. * This prevents ep_clear_and_put() from dropping all the ep references * while running concurrently with eventpoll_release_file(). * Returns true if the eventpoll can be disposed. */ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) { struct file *file = epi->ffd.file; struct epitems_head *to_free; struct hlist_head *head; lockdep_assert_irqs_enabled(); /* * Removes poll wait queue hooks. */ ep_unregister_pollwait(ep, epi); /* Remove the current item from the list of epoll hooks */ spin_lock(&file->f_lock); if (epi->dying && !force) { spin_unlock(&file->f_lock); return false; } to_free = NULL; head = file->f_ep; if (head->first == &epi->fllink && !epi->fllink.next) { /* See eventpoll_release() for details. */ WRITE_ONCE(file->f_ep, NULL); if (!is_file_epoll(file)) { struct epitems_head *v; v = container_of(head, struct epitems_head, epitems); if (!smp_load_acquire(&v->next)) to_free = v; } } hlist_del_rcu(&epi->fllink); spin_unlock(&file->f_lock); free_ephead(to_free); rb_erase_cached(&epi->rbn, &ep->rbr); write_lock_irq(&ep->lock); if (ep_is_linked(epi)) list_del_init(&epi->rdllink); write_unlock_irq(&ep->lock); wakeup_source_unregister(ep_wakeup_source(epi)); /* * At this point it is safe to free the eventpoll item. Use the union * field epi->rcu, since we are trying to minimize the size of * 'struct epitem'. The 'rbn' field is no longer in use. Protected by * ep->mtx. The rcu read side, reverse_path_check_proc(), does not make * use of the rbn field. */ kfree_rcu(epi, rcu); percpu_counter_dec(&ep->user->epoll_watches); return true; } /* * ep_remove variant for callers owing an additional reference to the ep */ static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi) { if (__ep_remove(ep, epi, false)) WARN_ON_ONCE(ep_refcount_dec_and_test(ep)); } static void ep_clear_and_put(struct eventpoll *ep) { struct rb_node *rbp, *next; struct epitem *epi; /* We need to release all tasks waiting for these file */ if (waitqueue_active(&ep->poll_wait)) ep_poll_safewake(ep, NULL, 0); mutex_lock(&ep->mtx); /* * Walks through the whole tree by unregistering poll callbacks. */ for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); ep_unregister_pollwait(ep, epi); cond_resched(); } /* * Walks through the whole tree and try to free each "struct epitem". * Note that ep_remove_safe() will not remove the epitem in case of a * racing eventpoll_release_file(); the latter will do the removal. * At this point we are sure no poll callbacks will be lingering around. * Since we still own a reference to the eventpoll struct, the loop can't * dispose it. */ for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = next) { next = rb_next(rbp); epi = rb_entry(rbp, struct epitem, rbn); ep_remove_safe(ep, epi); cond_resched(); } mutex_unlock(&ep->mtx); if (ep_refcount_dec_and_test(ep)) ep_free(ep); } static long ep_eventpoll_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int ret; if (!is_file_epoll(file)) return -EINVAL; switch (cmd) { case EPIOCSPARAMS: case EPIOCGPARAMS: ret = ep_eventpoll_bp_ioctl(file, cmd, arg); break; default: ret = -EINVAL; break; } return ret; } static int ep_eventpoll_release(struct inode *inode, struct file *file) { struct eventpoll *ep = file->private_data; if (ep) ep_clear_and_put(ep); return 0; } static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, int depth); static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int depth) { struct eventpoll *ep = file->private_data; LIST_HEAD(txlist); struct epitem *epi, *tmp; poll_table pt; __poll_t res = 0; init_poll_funcptr(&pt, NULL); /* Insert inside our poll wait queue */ poll_wait(file, &ep->poll_wait, wait); /* * Proceed to find out if wanted events are really available inside * the ready list. */ mutex_lock_nested(&ep->mtx, depth); ep_start_scan(ep, &txlist); list_for_each_entry_safe(epi, tmp, &txlist, rdllink) { if (ep_item_poll(epi, &pt, depth + 1)) { res = EPOLLIN | EPOLLRDNORM; break; } else { /* * Item has been dropped into the ready list by the poll * callback, but it's not actually ready, as far as * caller requested events goes. We can remove it here. */ __pm_relax(ep_wakeup_source(epi)); list_del_init(&epi->rdllink); } } ep_done_scan(ep, &txlist); mutex_unlock(&ep->mtx); return res; } /* * The ffd.file pointer may be in the process of being torn down due to * being closed, but we may not have finished eventpoll_release() yet. * * Normally, even with the atomic_long_inc_not_zero, the file may have * been free'd and then gotten re-allocated to something else (since * files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU). * * But for epoll, users hold the ep->mtx mutex, and as such any file in * the process of being free'd will block in eventpoll_release_file() * and thus the underlying file allocation will not be free'd, and the * file re-use cannot happen. * * For the same reason we can avoid a rcu_read_lock() around the * operation - 'ffd.file' cannot go away even if the refcount has * reached zero (but we must still not call out to ->poll() functions * etc). */ static struct file *epi_fget(const struct epitem *epi) { struct file *file; file = epi->ffd.file; if (!file_ref_get(&file->f_ref)) file = NULL; return file; } /* * Differs from ep_eventpoll_poll() in that internal callers already have * the ep->mtx so we need to start from depth=1, such that mutex_lock_nested() * is correctly annotated. */ static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, int depth) { struct file *file = epi_fget(epi); __poll_t res; /* * We could return EPOLLERR | EPOLLHUP or something, but let's * treat this more as "file doesn't exist, poll didn't happen". */ if (!file) return 0; pt->_key = epi->event.events; if (!is_file_epoll(file)) res = vfs_poll(file, pt); else res = __ep_eventpoll_poll(file, pt, depth); fput(file); return res & epi->event.events; } static __poll_t ep_eventpoll_poll(struct file *file, poll_table *wait) { return __ep_eventpoll_poll(file, wait, 0); } #ifdef CONFIG_PROC_FS static void ep_show_fdinfo(struct seq_file *m, struct file *f) { struct eventpoll *ep = f->private_data; struct rb_node *rbp; mutex_lock(&ep->mtx); for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { struct epitem *epi = rb_entry(rbp, struct epitem, rbn); struct inode *inode = file_inode(epi->ffd.file); seq_printf(m, "tfd: %8d events: %8x data: %16llx " " pos:%lli ino:%lx sdev:%x\n", epi->ffd.fd, epi->event.events, (long long)epi->event.data, (long long)epi->ffd.file->f_pos, inode->i_ino, inode->i_sb->s_dev); if (seq_has_overflowed(m)) break; } mutex_unlock(&ep->mtx); } #endif /* File callbacks that implement the eventpoll file behaviour */ static const struct file_operations eventpoll_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = ep_show_fdinfo, #endif .release = ep_eventpoll_release, .poll = ep_eventpoll_poll, .llseek = noop_llseek, .unlocked_ioctl = ep_eventpoll_ioctl, .compat_ioctl = compat_ptr_ioctl, }; /* * This is called from eventpoll_release() to unlink files from the eventpoll * interface. We need to have this facility to cleanup correctly files that are * closed without being removed from the eventpoll interface. */ void eventpoll_release_file(struct file *file) { struct eventpoll *ep; struct epitem *epi; bool dispose; /* * Use the 'dying' flag to prevent a concurrent ep_clear_and_put() from * touching the epitems list before eventpoll_release_file() can access * the ep->mtx. */ again: spin_lock(&file->f_lock); if (file->f_ep && file->f_ep->first) { epi = hlist_entry(file->f_ep->first, struct epitem, fllink); epi->dying = true; spin_unlock(&file->f_lock); /* * ep access is safe as we still own a reference to the ep * struct */ ep = epi->ep; mutex_lock(&ep->mtx); dispose = __ep_remove(ep, epi, true); mutex_unlock(&ep->mtx); if (dispose && ep_refcount_dec_and_test(ep)) ep_free(ep); goto again; } spin_unlock(&file->f_lock); } static int ep_alloc(struct eventpoll **pep) { struct eventpoll *ep; ep = kzalloc(sizeof(*ep), GFP_KERNEL); if (unlikely(!ep)) return -ENOMEM; mutex_init(&ep->mtx); rwlock_init(&ep->lock); init_waitqueue_head(&ep->wq); init_waitqueue_head(&ep->poll_wait); INIT_LIST_HEAD(&ep->rdllist); ep->rbr = RB_ROOT_CACHED; ep->ovflist = EP_UNACTIVE_PTR; ep->user = get_current_user(); refcount_set(&ep->refcount, 1); *pep = ep; return 0; } /* * Search the file inside the eventpoll tree. The RB tree operations * are protected by the "mtx" mutex, and ep_find() must be called with * "mtx" held. */ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) { int kcmp; struct rb_node *rbp; struct epitem *epi, *epir = NULL; struct epoll_filefd ffd; ep_set_ffd(&ffd, file, fd); for (rbp = ep->rbr.rb_root.rb_node; rbp; ) { epi = rb_entry(rbp, struct epitem, rbn); kcmp = ep_cmp_ffd(&ffd, &epi->ffd); if (kcmp > 0) rbp = rbp->rb_right; else if (kcmp < 0) rbp = rbp->rb_left; else { epir = epi; break; } } return epir; } #ifdef CONFIG_KCMP static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long toff) { struct rb_node *rbp; struct epitem *epi; for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); if (epi->ffd.fd == tfd) { if (toff == 0) return epi; else toff--; } cond_resched(); } return NULL; } struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff) { struct file *file_raw; struct eventpoll *ep; struct epitem *epi; if (!is_file_epoll(file)) return ERR_PTR(-EINVAL); ep = file->private_data; mutex_lock(&ep->mtx); epi = ep_find_tfd(ep, tfd, toff); if (epi) file_raw = epi->ffd.file; else file_raw = ERR_PTR(-ENOENT); mutex_unlock(&ep->mtx); return file_raw; } #endif /* CONFIG_KCMP */ /* * Adds a new entry to the tail of the list in a lockless way, i.e. * multiple CPUs are allowed to call this function concurrently. * * Beware: it is necessary to prevent any other modifications of the * existing list until all changes are completed, in other words * concurrent list_add_tail_lockless() calls should be protected * with a read lock, where write lock acts as a barrier which * makes sure all list_add_tail_lockless() calls are fully * completed. * * Also an element can be locklessly added to the list only in one * direction i.e. either to the tail or to the head, otherwise * concurrent access will corrupt the list. * * Return: %false if element has been already added to the list, %true * otherwise. */ static inline bool list_add_tail_lockless(struct list_head *new, struct list_head *head) { struct list_head *prev; /* * This is simple 'new->next = head' operation, but cmpxchg() * is used in order to detect that same element has been just * added to the list from another CPU: the winner observes * new->next == new. */ if (!try_cmpxchg(&new->next, &new, head)) return false; /* * Initially ->next of a new element must be updated with the head * (we are inserting to the tail) and only then pointers are atomically * exchanged. XCHG guarantees memory ordering, thus ->next should be * updated before pointers are actually swapped and pointers are * swapped before prev->next is updated. */ prev = xchg(&head->prev, new); /* * It is safe to modify prev->next and new->prev, because a new element * is added only to the tail and new->next is updated before XCHG. */ prev->next = new; new->prev = prev; return true; } /* * Chains a new epi entry to the tail of the ep->ovflist in a lockless way, * i.e. multiple CPUs are allowed to call this function concurrently. * * Return: %false if epi element has been already chained, %true otherwise. */ static inline bool chain_epi_lockless(struct epitem *epi) { struct eventpoll *ep = epi->ep; /* Fast preliminary check */ if (epi->next != EP_UNACTIVE_PTR) return false; /* Check that the same epi has not been just chained from another CPU */ if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR) return false; /* Atomically exchange tail */ epi->next = xchg(&ep->ovflist, epi); return true; } /* * This is the callback that is passed to the wait queue wakeup * mechanism. It is called by the stored file descriptors when they * have events to report. * * This callback takes a read lock in order not to contend with concurrent * events from another file descriptor, thus all modifications to ->rdllist * or ->ovflist are lockless. Read lock is paired with the write lock from * ep_start/done_scan(), which stops all list modifications and guarantees * that lists state is seen correctly. * * Another thing worth to mention is that ep_poll_callback() can be called * concurrently for the same @epi from different CPUs if poll table was inited * with several wait queues entries. Plural wakeup from different CPUs of a * single wait queue is serialized by wq.lock, but the case when multiple wait * queues are used should be detected accordingly. This is detected using * cmpxchg() operation. */ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { int pwake = 0; struct epitem *epi = ep_item_from_wait(wait); struct eventpoll *ep = epi->ep; __poll_t pollflags = key_to_poll(key); unsigned long flags; int ewake = 0; read_lock_irqsave(&ep->lock, flags); ep_set_busy_poll_napi_id(epi); /* * If the event mask does not contain any poll(2) event, we consider the * descriptor to be disabled. This condition is likely the effect of the * EPOLLONESHOT bit that disables the descriptor when an event is received, * until the next EPOLL_CTL_MOD will be issued. */ if (!(epi->event.events & ~EP_PRIVATE_BITS)) goto out_unlock; /* * Check the events coming with the callback. At this stage, not * every device reports the events in the "key" parameter of the * callback. We need to be able to handle both cases here, hence the * test for "key" != NULL before the event match test. */ if (pollflags && !(pollflags & epi->event.events)) goto out_unlock; /* * If we are transferring events to userspace, we can hold no locks * (because we're accessing user memory, and because of linux f_op->poll() * semantics). All the events that happen during that period of time are * chained in ep->ovflist and requeued later on. */ if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) { if (chain_epi_lockless(epi)) ep_pm_stay_awake_rcu(epi); } else if (!ep_is_linked(epi)) { /* In the usual case, add event to ready list. */ if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) ep_pm_stay_awake_rcu(epi); } /* * Wake up ( if active ) both the eventpoll wait list and the ->poll() * wait list. */ if (waitqueue_active(&ep->wq)) { if ((epi->event.events & EPOLLEXCLUSIVE) && !(pollflags & POLLFREE)) { switch (pollflags & EPOLLINOUT_BITS) { case EPOLLIN: if (epi->event.events & EPOLLIN) ewake = 1; break; case EPOLLOUT: if (epi->event.events & EPOLLOUT) ewake = 1; break; case 0: ewake = 1; break; } } if (sync) wake_up_sync(&ep->wq); else wake_up(&ep->wq); } if (waitqueue_active(&ep->poll_wait)) pwake++; out_unlock: read_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */ if (pwake) ep_poll_safewake(ep, epi, pollflags & EPOLL_URING_WAKE); if (!(epi->event.events & EPOLLEXCLUSIVE)) ewake = 1; if (pollflags & POLLFREE) { /* * If we race with ep_remove_wait_queue() it can miss * ->whead = NULL and do another remove_wait_queue() after * us, so we can't use __remove_wait_queue(). */ list_del_init(&wait->entry); /* * ->whead != NULL protects us from the race with * ep_clear_and_put() or ep_remove(), ep_remove_wait_queue() * takes whead->lock held by the caller. Once we nullify it, * nothing protects ep/epi or even wait. */ smp_store_release(&ep_pwq_from_wait(wait)->whead, NULL); } return ewake; } /* * This is the callback that is used to add our wait queue to the * target file wakeup lists. */ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, poll_table *pt) { struct ep_pqueue *epq = container_of(pt, struct ep_pqueue, pt); struct epitem *epi = epq->epi; struct eppoll_entry *pwq; if (unlikely(!epi)) // an earlier allocation has failed return; pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL); if (unlikely(!pwq)) { epq->epi = NULL; return; } init_waitqueue_func_entry(&pwq->wait, ep_poll_callback); pwq->whead = whead; pwq->base = epi; if (epi->event.events & EPOLLEXCLUSIVE) add_wait_queue_exclusive(whead, &pwq->wait); else add_wait_queue(whead, &pwq->wait); pwq->next = epi->pwqlist; epi->pwqlist = pwq; } static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) { int kcmp; struct rb_node **p = &ep->rbr.rb_root.rb_node, *parent = NULL; struct epitem *epic; bool leftmost = true; while (*p) { parent = *p; epic = rb_entry(parent, struct epitem, rbn); kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd); if (kcmp > 0) { p = &parent->rb_right; leftmost = false; } else p = &parent->rb_left; } rb_link_node(&epi->rbn, parent, p); rb_insert_color_cached(&epi->rbn, &ep->rbr, leftmost); } #define PATH_ARR_SIZE 5 /* * These are the number paths of length 1 to 5, that we are allowing to emanate * from a single file of interest. For example, we allow 1000 paths of length * 1, to emanate from each file of interest. This essentially represents the * potential wakeup paths, which need to be limited in order to avoid massive * uncontrolled wakeup storms. The common use case should be a single ep which * is connected to n file sources. In this case each file source has 1 path * of length 1. Thus, the numbers below should be more than sufficient. These * path limits are enforced during an EPOLL_CTL_ADD operation, since a modify * and delete can't add additional paths. Protected by the epnested_mutex. */ static const int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 }; static int path_count[PATH_ARR_SIZE]; static int path_count_inc(int nests) { /* Allow an arbitrary number of depth 1 paths */ if (nests == 0) return 0; if (++path_count[nests] > path_limits[nests]) return -1; return 0; } static void path_count_init(void) { int i; for (i = 0; i < PATH_ARR_SIZE; i++) path_count[i] = 0; } static int reverse_path_check_proc(struct hlist_head *refs, int depth) { int error = 0; struct epitem *epi; if (depth > EP_MAX_NESTS) /* too deep nesting */ return -1; /* CTL_DEL can remove links here, but that can't increase our count */ hlist_for_each_entry_rcu(epi, refs, fllink) { struct hlist_head *refs = &epi->ep->refs; if (hlist_empty(refs)) error = path_count_inc(depth); else error = reverse_path_check_proc(refs, depth + 1); if (error != 0) break; } return error; } /** * reverse_path_check - The tfile_check_list is list of epitem_head, which have * links that are proposed to be newly added. We need to * make sure that those added links don't add too many * paths such that we will spend all our time waking up * eventpoll objects. * * Return: %zero if the proposed links don't create too many paths, * %-1 otherwise. */ static int reverse_path_check(void) { struct epitems_head *p; for (p = tfile_check_list; p != EP_UNACTIVE_PTR; p = p->next) { int error; path_count_init(); rcu_read_lock(); error = reverse_path_check_proc(&p->epitems, 0); rcu_read_unlock(); if (error) return error; } return 0; } static int ep_create_wakeup_source(struct epitem *epi) { struct name_snapshot n; struct wakeup_source *ws; if (!epi->ep->ws) { epi->ep->ws = wakeup_source_register(NULL, "eventpoll"); if (!epi->ep->ws) return -ENOMEM; } take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry); ws = wakeup_source_register(NULL, n.name.name); release_dentry_name_snapshot(&n); if (!ws) return -ENOMEM; rcu_assign_pointer(epi->ws, ws); return 0; } /* rare code path, only used when EPOLL_CTL_MOD removes a wakeup source */ static noinline void ep_destroy_wakeup_source(struct epitem *epi) { struct wakeup_source *ws = ep_wakeup_source(epi); RCU_INIT_POINTER(epi->ws, NULL); /* * wait for ep_pm_stay_awake_rcu to finish, synchronize_rcu is * used internally by wakeup_source_remove, too (called by * wakeup_source_unregister), so we cannot use call_rcu */ synchronize_rcu(); wakeup_source_unregister(ws); } static int attach_epitem(struct file *file, struct epitem *epi) { struct epitems_head *to_free = NULL; struct hlist_head *head = NULL; struct eventpoll *ep = NULL; if (is_file_epoll(file)) ep = file->private_data; if (ep) { head = &ep->refs; } else if (!READ_ONCE(file->f_ep)) { allocate: to_free = kmem_cache_zalloc(ephead_cache, GFP_KERNEL); if (!to_free) return -ENOMEM; head = &to_free->epitems; } spin_lock(&file->f_lock); if (!file->f_ep) { if (unlikely(!head)) { spin_unlock(&file->f_lock); goto allocate; } /* See eventpoll_release() for details. */ WRITE_ONCE(file->f_ep, head); to_free = NULL; } hlist_add_head_rcu(&epi->fllink, file->f_ep); spin_unlock(&file->f_lock); free_ephead(to_free); return 0; } /* * Must be called with "mtx" held. */ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, struct file *tfile, int fd, int full_check) { int error, pwake = 0; __poll_t revents; struct epitem *epi; struct ep_pqueue epq; struct eventpoll *tep = NULL; if (is_file_epoll(tfile)) tep = tfile->private_data; lockdep_assert_irqs_enabled(); if (unlikely(percpu_counter_compare(&ep->user->epoll_watches, max_user_watches) >= 0)) return -ENOSPC; percpu_counter_inc(&ep->user->epoll_watches); if (!(epi = kmem_cache_zalloc(epi_cache, GFP_KERNEL))) { percpu_counter_dec(&ep->user->epoll_watches); return -ENOMEM; } /* Item initialization follow here ... */ INIT_LIST_HEAD(&epi->rdllink); epi->ep = ep; ep_set_ffd(&epi->ffd, tfile, fd); epi->event = *event; epi->next = EP_UNACTIVE_PTR; if (tep) mutex_lock_nested(&tep->mtx, 1); /* Add the current item to the list of active epoll hook for this file */ if (unlikely(attach_epitem(tfile, epi) < 0)) { if (tep) mutex_unlock(&tep->mtx); kmem_cache_free(epi_cache, epi); percpu_counter_dec(&ep->user->epoll_watches); return -ENOMEM; } if (full_check && !tep) list_file(tfile); /* * Add the current item to the RB tree. All RB tree operations are * protected by "mtx", and ep_insert() is called with "mtx" held. */ ep_rbtree_insert(ep, epi); if (tep) mutex_unlock(&tep->mtx); /* * ep_remove_safe() calls in the later error paths can't lead to * ep_free() as the ep file itself still holds an ep reference. */ ep_get(ep); /* now check if we've created too many backpaths */ if (unlikely(full_check && reverse_path_check())) { ep_remove_safe(ep, epi); return -EINVAL; } if (epi->event.events & EPOLLWAKEUP) { error = ep_create_wakeup_source(epi); if (error) { ep_remove_safe(ep, epi); return error; } } /* Initialize the poll table using the queue callback */ epq.epi = epi; init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); /* * Attach the item to the poll hooks and get current event bits. * We can safely use the file* here because its usage count has * been increased by the caller of this function. Note that after * this operation completes, the poll callback can start hitting * the new item. */ revents = ep_item_poll(epi, &epq.pt, 1); /* * We have to check if something went wrong during the poll wait queue * install process. Namely an allocation for a wait queue failed due * high memory pressure. */ if (unlikely(!epq.epi)) { ep_remove_safe(ep, epi); return -ENOMEM; } /* We have to drop the new item inside our item list to keep track of it */ write_lock_irq(&ep->lock); /* record NAPI ID of new item if present */ ep_set_busy_poll_napi_id(epi); /* If the file is already "ready" we drop it inside the ready list */ if (revents && !ep_is_linked(epi)) { list_add_tail(&epi->rdllink, &ep->rdllist); ep_pm_stay_awake(epi); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) wake_up(&ep->wq); if (waitqueue_active(&ep->poll_wait)) pwake++; } write_unlock_irq(&ep->lock); /* We have to call this outside the lock */ if (pwake) ep_poll_safewake(ep, NULL, 0); return 0; } /* * Modify the interest event mask by dropping an event if the new mask * has a match in the current file status. Must be called with "mtx" held. */ static int ep_modify(struct eventpoll *ep, struct epitem *epi, const struct epoll_event *event) { int pwake = 0; poll_table pt; lockdep_assert_irqs_enabled(); init_poll_funcptr(&pt, NULL); /* * Set the new event interest mask before calling f_op->poll(); * otherwise we might miss an event that happens between the * f_op->poll() call and the new event set registering. */ epi->event.events = event->events; /* need barrier below */ epi->event.data = event->data; /* protected by mtx */ if (epi->event.events & EPOLLWAKEUP) { if (!ep_has_wakeup_source(epi)) ep_create_wakeup_source(epi); } else if (ep_has_wakeup_source(epi)) { ep_destroy_wakeup_source(epi); } /* * The following barrier has two effects: * * 1) Flush epi changes above to other CPUs. This ensures * we do not miss events from ep_poll_callback if an * event occurs immediately after we call f_op->poll(). * We need this because we did not take ep->lock while * changing epi above (but ep_poll_callback does take * ep->lock). * * 2) We also need to ensure we do not miss _past_ events * when calling f_op->poll(). This barrier also * pairs with the barrier in wq_has_sleeper (see * comments for wq_has_sleeper). * * This barrier will now guarantee ep_poll_callback or f_op->poll * (or both) will notice the readiness of an item. */ smp_mb(); /* * Get current event bits. We can safely use the file* here because * its usage count has been increased by the caller of this function. * If the item is "hot" and it is not registered inside the ready * list, push it inside. */ if (ep_item_poll(epi, &pt, 1)) { write_lock_irq(&ep->lock); if (!ep_is_linked(epi)) { list_add_tail(&epi->rdllink, &ep->rdllist); ep_pm_stay_awake(epi); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) wake_up(&ep->wq); if (waitqueue_active(&ep->poll_wait)) pwake++; } write_unlock_irq(&ep->lock); } /* We have to call this outside the lock */ if (pwake) ep_poll_safewake(ep, NULL, 0); return 0; } static int ep_send_events(struct eventpoll *ep, struct epoll_event __user *events, int maxevents) { struct epitem *epi, *tmp; LIST_HEAD(txlist); poll_table pt; int res = 0; /* * Always short-circuit for fatal signals to allow threads to make a * timely exit without the chance of finding more events available and * fetching repeatedly. */ if (fatal_signal_pending(current)) return -EINTR; init_poll_funcptr(&pt, NULL); mutex_lock(&ep->mtx); ep_start_scan(ep, &txlist); /* * We can loop without lock because we are passed a task private list. * Items cannot vanish during the loop we are holding ep->mtx. */ list_for_each_entry_safe(epi, tmp, &txlist, rdllink) { struct wakeup_source *ws; __poll_t revents; if (res >= maxevents) break; /* * Activate ep->ws before deactivating epi->ws to prevent * triggering auto-suspend here (in case we reactive epi->ws * below). * * This could be rearranged to delay the deactivation of epi->ws * instead, but then epi->ws would temporarily be out of sync * with ep_is_linked(). */ ws = ep_wakeup_source(epi); if (ws) { if (ws->active) __pm_stay_awake(ep->ws); __pm_relax(ws); } list_del_init(&epi->rdllink); /* * If the event mask intersect the caller-requested one, * deliver the event to userspace. Again, we are holding ep->mtx, * so no operations coming from userspace can change the item. */ revents = ep_item_poll(epi, &pt, 1); if (!revents) continue; events = epoll_put_uevent(revents, epi->event.data, events); if (!events) { list_add(&epi->rdllink, &txlist); ep_pm_stay_awake(epi); if (!res) res = -EFAULT; break; } res++; if (epi->event.events & EPOLLONESHOT) epi->event.events &= EP_PRIVATE_BITS; else if (!(epi->event.events & EPOLLET)) { /* * If this file has been added with Level * Trigger mode, we need to insert back inside * the ready list, so that the next call to * epoll_wait() will check again the events * availability. At this point, no one can insert * into ep->rdllist besides us. The epoll_ctl() * callers are locked out by * ep_send_events() holding "mtx" and the * poll callback will queue them in ep->ovflist. */ list_add_tail(&epi->rdllink, &ep->rdllist); ep_pm_stay_awake(epi); } } ep_done_scan(ep, &txlist); mutex_unlock(&ep->mtx); return res; } static struct timespec64 *ep_timeout_to_timespec(struct timespec64 *to, long ms) { struct timespec64 now; if (ms < 0) return NULL; if (!ms) { to->tv_sec = 0; to->tv_nsec = 0; return to; } to->tv_sec = ms / MSEC_PER_SEC; to->tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC); ktime_get_ts64(&now); *to = timespec64_add_safe(now, *to); return to; } /* * autoremove_wake_function, but remove even on failure to wake up, because we * know that default_wake_function/ttwu will only fail if the thread is already * woken, and in that case the ep_poll loop will remove the entry anyways, not * try to reuse it. */ static int ep_autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned int mode, int sync, void *key) { int ret = default_wake_function(wq_entry, mode, sync, key); /* * Pairs with list_empty_careful in ep_poll, and ensures future loop * iterations see the cause of this wakeup. */ list_del_init_careful(&wq_entry->entry); return ret; } static int ep_try_send_events(struct eventpoll *ep, struct epoll_event __user *events, int maxevents) { int res; /* * Try to transfer events to user space. In case we get 0 events and * there's still timeout left over, we go trying again in search of * more luck. */ res = ep_send_events(ep, events, maxevents); if (res > 0) ep_suspend_napi_irqs(ep); return res; } static int ep_schedule_timeout(ktime_t *to) { if (to) return ktime_after(*to, ktime_get()); else return 1; } /** * ep_poll - Retrieves ready events, and delivers them to the caller-supplied * event buffer. * * @ep: Pointer to the eventpoll context. * @events: Pointer to the userspace buffer where the ready events should be * stored. * @maxevents: Size (in terms of number of events) of the caller event buffer. * @timeout: Maximum timeout for the ready events fetch operation, in * timespec. If the timeout is zero, the function will not block, * while if the @timeout ptr is NULL, the function will block * until at least one event has been retrieved (or an error * occurred). * * Return: the number of ready events which have been fetched, or an * error code, in case of error. */ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, int maxevents, struct timespec64 *timeout) { int res, eavail, timed_out = 0; u64 slack = 0; wait_queue_entry_t wait; ktime_t expires, *to = NULL; lockdep_assert_irqs_enabled(); if (timeout && (timeout->tv_sec | timeout->tv_nsec)) { slack = select_estimate_accuracy(timeout); to = &expires; *to = timespec64_to_ktime(*timeout); } else if (timeout) { /* * Avoid the unnecessary trip to the wait queue loop, if the * caller specified a non blocking operation. */ timed_out = 1; } /* * This call is racy: We may or may not see events that are being added * to the ready list under the lock (e.g., in IRQ callbacks). For cases * with a non-zero timeout, this thread will check the ready list under * lock and will add to the wait queue. For cases with a zero * timeout, the user by definition should not care and will have to * recheck again. */ eavail = ep_events_available(ep); while (1) { if (eavail) { res = ep_try_send_events(ep, events, maxevents); if (res) return res; } if (timed_out) return 0; eavail = ep_busy_loop(ep); if (eavail) continue; if (signal_pending(current)) return -EINTR; /* * Internally init_wait() uses autoremove_wake_function(), * thus wait entry is removed from the wait queue on each * wakeup. Why it is important? In case of several waiters * each new wakeup will hit the next waiter, giving it the * chance to harvest new event. Otherwise wakeup can be * lost. This is also good performance-wise, because on * normal wakeup path no need to call __remove_wait_queue() * explicitly, thus ep->lock is not taken, which halts the * event delivery. * * In fact, we now use an even more aggressive function that * unconditionally removes, because we don't reuse the wait * entry between loop iterations. This lets us also avoid the * performance issue if a process is killed, causing all of its * threads to wake up without being removed normally. */ init_wait(&wait); wait.func = ep_autoremove_wake_function; write_lock_irq(&ep->lock); /* * Barrierless variant, waitqueue_active() is called under * the same lock on wakeup ep_poll_callback() side, so it * is safe to avoid an explicit barrier. */ __set_current_state(TASK_INTERRUPTIBLE); /* * Do the final check under the lock. ep_start/done_scan() * plays with two lists (->rdllist and ->ovflist) and there * is always a race when both lists are empty for short * period of time although events are pending, so lock is * important. */ eavail = ep_events_available(ep); if (!eavail) __add_wait_queue_exclusive(&ep->wq, &wait); write_unlock_irq(&ep->lock); if (!eavail) timed_out = !ep_schedule_timeout(to) || !schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS); __set_current_state(TASK_RUNNING); /* * We were woken up, thus go and try to harvest some events. * If timed out and still on the wait queue, recheck eavail * carefully under lock, below. */ eavail = 1; if (!list_empty_careful(&wait.entry)) { write_lock_irq(&ep->lock); /* * If the thread timed out and is not on the wait queue, * it means that the thread was woken up after its * timeout expired before it could reacquire the lock. * Thus, when wait.entry is empty, it needs to harvest * events. */ if (timed_out) eavail = list_empty(&wait.entry); __remove_wait_queue(&ep->wq, &wait); write_unlock_irq(&ep->lock); } } } /** * ep_loop_check_proc - verify that adding an epoll file inside another * epoll structure does not violate the constraints, in * terms of closed loops, or too deep chains (which can * result in excessive stack usage). * * @ep: the &struct eventpoll to be currently checked. * @depth: Current depth of the path being checked. * * Return: %zero if adding the epoll @file inside current epoll * structure @ep does not violate the constraints, or %-1 otherwise. */ static int ep_loop_check_proc(struct eventpoll *ep, int depth) { int error = 0; struct rb_node *rbp; struct epitem *epi; mutex_lock_nested(&ep->mtx, depth + 1); ep->gen = loop_check_gen; for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); if (unlikely(is_file_epoll(epi->ffd.file))) { struct eventpoll *ep_tovisit; ep_tovisit = epi->ffd.file->private_data; if (ep_tovisit->gen == loop_check_gen) continue; if (ep_tovisit == inserting_into || depth > EP_MAX_NESTS) error = -1; else error = ep_loop_check_proc(ep_tovisit, depth + 1); if (error != 0) break; } else { /* * If we've reached a file that is not associated with * an ep, then we need to check if the newly added * links are going to add too many wakeup paths. We do * this by adding it to the tfile_check_list, if it's * not already there, and calling reverse_path_check() * during ep_insert(). */ list_file(epi->ffd.file); } } mutex_unlock(&ep->mtx); return error; } /** * ep_loop_check - Performs a check to verify that adding an epoll file (@to) * into another epoll file (represented by @ep) does not create * closed loops or too deep chains. * * @ep: Pointer to the epoll we are inserting into. * @to: Pointer to the epoll to be inserted. * * Return: %zero if adding the epoll @to inside the epoll @from * does not violate the constraints, or %-1 otherwise. */ static int ep_loop_check(struct eventpoll *ep, struct eventpoll *to) { inserting_into = ep; return ep_loop_check_proc(to, 0); } static void clear_tfile_check_list(void) { rcu_read_lock(); while (tfile_check_list != EP_UNACTIVE_PTR) { struct epitems_head *head = tfile_check_list; tfile_check_list = head->next; unlist_file(head); } rcu_read_unlock(); } /* * Open an eventpoll file descriptor. */ static int do_epoll_create(int flags) { int error, fd; struct eventpoll *ep = NULL; struct file *file; /* Check the EPOLL_* constant for consistency. */ BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); if (flags & ~EPOLL_CLOEXEC) return -EINVAL; /* * Create the internal data structure ("struct eventpoll"). */ error = ep_alloc(&ep); if (error < 0) return error; /* * Creates all the items needed to setup an eventpoll file. That is, * a file structure and a free file descriptor. */ fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC)); if (fd < 0) { error = fd; goto out_free_ep; } file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep, O_RDWR | (flags & O_CLOEXEC)); if (IS_ERR(file)) { error = PTR_ERR(file); goto out_free_fd; } ep->file = file; fd_install(fd, file); return fd; out_free_fd: put_unused_fd(fd); out_free_ep: ep_clear_and_put(ep); return error; } SYSCALL_DEFINE1(epoll_create1, int, flags) { return do_epoll_create(flags); } SYSCALL_DEFINE1(epoll_create, int, size) { if (size <= 0) return -EINVAL; return do_epoll_create(0); } #ifdef CONFIG_PM_SLEEP static inline void ep_take_care_of_epollwakeup(struct epoll_event *epev) { if ((epev->events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND)) epev->events &= ~EPOLLWAKEUP; } #else static inline void ep_take_care_of_epollwakeup(struct epoll_event *epev) { epev->events &= ~EPOLLWAKEUP; } #endif static inline int epoll_mutex_lock(struct mutex *mutex, int depth, bool nonblock) { if (!nonblock) { mutex_lock_nested(mutex, depth); return 0; } if (mutex_trylock(mutex)) return 0; return -EAGAIN; } int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, bool nonblock) { int error; int full_check = 0; struct eventpoll *ep; struct epitem *epi; struct eventpoll *tep = NULL; CLASS(fd, f)(epfd); if (fd_empty(f)) return -EBADF; /* Get the "struct file *" for the target file */ CLASS(fd, tf)(fd); if (fd_empty(tf)) return -EBADF; /* The target file descriptor must support poll */ if (!file_can_poll(fd_file(tf))) return -EPERM; /* Check if EPOLLWAKEUP is allowed */ if (ep_op_has_event(op)) ep_take_care_of_epollwakeup(epds); /* * We have to check that the file structure underneath the file descriptor * the user passed to us _is_ an eventpoll file. And also we do not permit * adding an epoll file descriptor inside itself. */ error = -EINVAL; if (fd_file(f) == fd_file(tf) || !is_file_epoll(fd_file(f))) goto error_tgt_fput; /* * epoll adds to the wakeup queue at EPOLL_CTL_ADD time only, * so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation. * Also, we do not currently supported nested exclusive wakeups. */ if (ep_op_has_event(op) && (epds->events & EPOLLEXCLUSIVE)) { if (op == EPOLL_CTL_MOD) goto error_tgt_fput; if (op == EPOLL_CTL_ADD && (is_file_epoll(fd_file(tf)) || (epds->events & ~EPOLLEXCLUSIVE_OK_BITS))) goto error_tgt_fput; } /* * At this point it is safe to assume that the "private_data" contains * our own data structure. */ ep = fd_file(f)->private_data; /* * When we insert an epoll file descriptor inside another epoll file * descriptor, there is the chance of creating closed loops, which are * better be handled here, than in more critical paths. While we are * checking for loops we also determine the list of files reachable * and hang them on the tfile_check_list, so we can check that we * haven't created too many possible wakeup paths. * * We do not need to take the global 'epumutex' on EPOLL_CTL_ADD when * the epoll file descriptor is attaching directly to a wakeup source, * unless the epoll file descriptor is nested. The purpose of taking the * 'epnested_mutex' on add is to prevent complex toplogies such as loops and * deep wakeup paths from forming in parallel through multiple * EPOLL_CTL_ADD operations. */ error = epoll_mutex_lock(&ep->mtx, 0, nonblock); if (error) goto error_tgt_fput; if (op == EPOLL_CTL_ADD) { if (READ_ONCE(fd_file(f)->f_ep) || ep->gen == loop_check_gen || is_file_epoll(fd_file(tf))) { mutex_unlock(&ep->mtx); error = epoll_mutex_lock(&epnested_mutex, 0, nonblock); if (error) goto error_tgt_fput; loop_check_gen++; full_check = 1; if (is_file_epoll(fd_file(tf))) { tep = fd_file(tf)->private_data; error = -ELOOP; if (ep_loop_check(ep, tep) != 0) goto error_tgt_fput; } error = epoll_mutex_lock(&ep->mtx, 0, nonblock); if (error) goto error_tgt_fput; } } /* * Try to lookup the file inside our RB tree. Since we grabbed "mtx" * above, we can be sure to be able to use the item looked up by * ep_find() till we release the mutex. */ epi = ep_find(ep, fd_file(tf), fd); error = -EINVAL; switch (op) { case EPOLL_CTL_ADD: if (!epi) { epds->events |= EPOLLERR | EPOLLHUP; error = ep_insert(ep, epds, fd_file(tf), fd, full_check); } else error = -EEXIST; break; case EPOLL_CTL_DEL: if (epi) { /* * The eventpoll itself is still alive: the refcount * can't go to zero here. */ ep_remove_safe(ep, epi); error = 0; } else { error = -ENOENT; } break; case EPOLL_CTL_MOD: if (epi) { if (!(epi->event.events & EPOLLEXCLUSIVE)) { epds->events |= EPOLLERR | EPOLLHUP; error = ep_modify(ep, epi, epds); } } else error = -ENOENT; break; } mutex_unlock(&ep->mtx); error_tgt_fput: if (full_check) { clear_tfile_check_list(); loop_check_gen++; mutex_unlock(&epnested_mutex); } return error; } /* * The following function implements the controller interface for * the eventpoll file that enables the insertion/removal/change of * file descriptors inside the interest set. */ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event) { struct epoll_event epds; if (ep_op_has_event(op) && copy_from_user(&epds, event, sizeof(struct epoll_event))) return -EFAULT; return do_epoll_ctl(epfd, op, fd, &epds, false); } static int ep_check_params(struct file *file, struct epoll_event __user *evs, int maxevents) { /* The maximum number of event must be greater than zero */ if (maxevents <= 0 || maxevents > EP_MAX_EVENTS) return -EINVAL; /* Verify that the area passed by the user is writeable */ if (!access_ok(evs, maxevents * sizeof(struct epoll_event))) return -EFAULT; /* * We have to check that the file structure underneath the fd * the user passed to us _is_ an eventpoll file. */ if (!is_file_epoll(file)) return -EINVAL; return 0; } int epoll_sendevents(struct file *file, struct epoll_event __user *events, int maxevents) { struct eventpoll *ep; int ret; ret = ep_check_params(file, events, maxevents); if (unlikely(ret)) return ret; ep = file->private_data; /* * Racy call, but that's ok - it should get retried based on * poll readiness anyway. */ if (ep_events_available(ep)) return ep_try_send_events(ep, events, maxevents); return 0; } /* * Implement the event wait interface for the eventpoll file. It is the kernel * part of the user space epoll_wait(2). */ static int do_epoll_wait(int epfd, struct epoll_event __user *events, int maxevents, struct timespec64 *to) { struct eventpoll *ep; int ret; /* Get the "struct file *" for the eventpoll file */ CLASS(fd, f)(epfd); if (fd_empty(f)) return -EBADF; ret = ep_check_params(fd_file(f), events, maxevents); if (unlikely(ret)) return ret; /* * At this point it is safe to assume that the "private_data" contains * our own data structure. */ ep = fd_file(f)->private_data; /* Time to fish for events ... */ return ep_poll(ep, events, maxevents, to); } SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout) { struct timespec64 to; return do_epoll_wait(epfd, events, maxevents, ep_timeout_to_timespec(&to, timeout)); } /* * Implement the event wait interface for the eventpoll file. It is the kernel * part of the user space epoll_pwait(2). */ static int do_epoll_pwait(int epfd, struct epoll_event __user *events, int maxevents, struct timespec64 *to, const sigset_t __user *sigmask, size_t sigsetsize) { int error; /* * If the caller wants a certain signal mask to be set during the wait, * we apply it here. */ error = set_user_sigmask(sigmask, sigsetsize); if (error) return error; error = do_epoll_wait(epfd, events, maxevents, to); restore_saved_sigmask_unless(error == -EINTR); return error; } SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout, const sigset_t __user *, sigmask, size_t, sigsetsize) { struct timespec64 to; return do_epoll_pwait(epfd, events, maxevents, ep_timeout_to_timespec(&to, timeout), sigmask, sigsetsize); } SYSCALL_DEFINE6(epoll_pwait2, int, epfd, struct epoll_event __user *, events, int, maxevents, const struct __kernel_timespec __user *, timeout, const sigset_t __user *, sigmask, size_t, sigsetsize) { struct timespec64 ts, *to = NULL; if (timeout) { if (get_timespec64(&ts, timeout)) return -EFAULT; to = &ts; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; } return do_epoll_pwait(epfd, events, maxevents, to, sigmask, sigsetsize); } #ifdef CONFIG_COMPAT static int do_compat_epoll_pwait(int epfd, struct epoll_event __user *events, int maxevents, struct timespec64 *timeout, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize) { long err; /* * If the caller wants a certain signal mask to be set during the wait, * we apply it here. */ err = set_compat_user_sigmask(sigmask, sigsetsize); if (err) return err; err = do_epoll_wait(epfd, events, maxevents, timeout); restore_saved_sigmask_unless(err == -EINTR); return err; } COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout, const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { struct timespec64 to; return do_compat_epoll_pwait(epfd, events, maxevents, ep_timeout_to_timespec(&to, timeout), sigmask, sigsetsize); } COMPAT_SYSCALL_DEFINE6(epoll_pwait2, int, epfd, struct epoll_event __user *, events, int, maxevents, const struct __kernel_timespec __user *, timeout, const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { struct timespec64 ts, *to = NULL; if (timeout) { if (get_timespec64(&ts, timeout)) return -EFAULT; to = &ts; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; } return do_compat_epoll_pwait(epfd, events, maxevents, to, sigmask, sigsetsize); } #endif static int __init eventpoll_init(void) { struct sysinfo si; si_meminfo(&si); /* * Allows top 4% of lomem to be allocated for epoll watches (per user). */ max_user_watches = (((si.totalram - si.totalhigh) / 25) << PAGE_SHIFT) / EP_ITEM_COST; BUG_ON(max_user_watches < 0); /* * We can have many thousands of epitems, so prevent this from * using an extra cache line on 64-bit (and smaller) CPUs */ BUILD_BUG_ON(sizeof(void *) <= 8 && sizeof(struct epitem) > 128); /* Allocates slab cache used to allocate "struct epitem" items */ epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); /* Allocates slab cache used to allocate "struct eppoll_entry" */ pwq_cache = kmem_cache_create("eventpoll_pwq", sizeof(struct eppoll_entry), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL); epoll_sysctls_init(); ephead_cache = kmem_cache_create("ep_head", sizeof(struct epitems_head), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL); return 0; } fs_initcall(eventpoll_init);
11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 /* SPDX-License-Identifier: GPL-2.0-only */ /* * linux/fs/pnode.h * * (C) Copyright IBM Corporation 2005. */ #ifndef _LINUX_PNODE_H #define _LINUX_PNODE_H #include <linux/list.h> #include "mount.h" #define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED) #define IS_MNT_SLAVE(m) ((m)->mnt_master) #define IS_MNT_NEW(m) (!(m)->mnt_ns) #define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED) #define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE) #define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED) #define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED) #define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED) #define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED) #define CL_EXPIRE 0x01 #define CL_SLAVE 0x02 #define CL_COPY_UNBINDABLE 0x04 #define CL_MAKE_SHARED 0x08 #define CL_PRIVATE 0x10 #define CL_SHARED_TO_SLAVE 0x20 #define CL_COPY_MNT_NS_FILE 0x40 static inline void set_mnt_shared(struct mount *mnt) { mnt->mnt.mnt_flags &= ~MNT_SHARED_MASK; mnt->mnt.mnt_flags |= MNT_SHARED; } void change_mnt_propagation(struct mount *, int); int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, struct hlist_head *); int propagate_umount(struct list_head *); int propagate_mount_busy(struct mount *, int); void propagate_mount_unlock(struct mount *); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); int mnt_get_count(struct mount *mnt); void mnt_set_mountpoint(struct mount *, struct mountpoint *, struct mount *); void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt); struct mount *copy_tree(struct mount *, struct dentry *, int); bool is_path_reachable(struct mount *, struct dentry *, const struct path *root); int count_mounts(struct mnt_namespace *ns, struct mount *mnt); bool propagation_would_overmount(const struct mount *from, const struct mount *to, const struct mountpoint *mp); #endif /* _LINUX_PNODE_H */
18 1 1 2 6 8 11 2 1 12 1 1 10 1 7 2 4 5 8 1 8 1 9 9 9 9 2 1 1 9 9 1 1 2 5 200 200 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, Intel Corporation. * * Author: Alexander Duyck <alexander.h.duyck@intel.com> */ #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/dsfield.h> #include <net/pkt_cls.h> #include <net/tc_wrapper.h> #include <linux/tc_act/tc_skbedit.h> #include <net/tc_act/tc_skbedit.h> static struct tc_action_ops act_skbedit_ops; static u16 tcf_skbedit_hash(struct tcf_skbedit_params *params, struct sk_buff *skb) { u16 queue_mapping = params->queue_mapping; if (params->flags & SKBEDIT_F_TXQ_SKBHASH) { u32 hash = skb_get_hash(skb); queue_mapping += hash % params->mapping_mod; } return netdev_cap_txqueue(skb->dev, queue_mapping); } TC_INDIRECT_SCOPE int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_skbedit *d = to_skbedit(a); struct tcf_skbedit_params *params; int action; tcf_lastuse_update(&d->tcf_tm); bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb); params = rcu_dereference_bh(d->params); action = READ_ONCE(d->tcf_action); if (params->flags & SKBEDIT_F_PRIORITY) skb->priority = params->priority; if (params->flags & SKBEDIT_F_INHERITDSFIELD) { int wlen = skb_network_offset(skb); switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): wlen += sizeof(struct iphdr); if (!pskb_may_pull(skb, wlen)) goto err; skb->priority = ipv4_get_dsfield(ip_hdr(skb)) >> 2; break; case htons(ETH_P_IPV6): wlen += sizeof(struct ipv6hdr); if (!pskb_may_pull(skb, wlen)) goto err; skb->priority = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; break; } } if (params->flags & SKBEDIT_F_QUEUE_MAPPING && skb->dev->real_num_tx_queues > params->queue_mapping) { #ifdef CONFIG_NET_EGRESS netdev_xmit_skip_txqueue(true); #endif skb_set_queue_mapping(skb, tcf_skbedit_hash(params, skb)); } if (params->flags & SKBEDIT_F_MARK) { skb->mark &= ~params->mask; skb->mark |= params->mark & params->mask; } if (params->flags & SKBEDIT_F_PTYPE) skb->pkt_type = params->ptype; return action; err: qstats_drop_inc(this_cpu_ptr(d->common.cpu_qstats)); return TC_ACT_SHOT; } static void tcf_skbedit_stats_update(struct tc_action *a, u64 bytes, u64 packets, u64 drops, u64 lastuse, bool hw) { struct tcf_skbedit *d = to_skbedit(a); struct tcf_t *tm = &d->tcf_tm; tcf_action_update_stats(a, bytes, packets, drops, hw); tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { [TCA_SKBEDIT_PARMS] = { .len = sizeof(struct tc_skbedit) }, [TCA_SKBEDIT_PRIORITY] = { .len = sizeof(u32) }, [TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) }, [TCA_SKBEDIT_MARK] = { .len = sizeof(u32) }, [TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) }, [TCA_SKBEDIT_MASK] = { .len = sizeof(u32) }, [TCA_SKBEDIT_FLAGS] = { .len = sizeof(u64) }, [TCA_SKBEDIT_QUEUE_MAPPING_MAX] = { .len = sizeof(u16) }, }; static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 act_flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_skbedit_ops.net_id); bool bind = act_flags & TCA_ACT_FLAGS_BIND; struct tcf_skbedit_params *params_new; struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_skbedit *parm; struct tcf_skbedit *d; u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL; u16 *queue_mapping = NULL, *ptype = NULL; u16 mapping_mod = 1; bool exists = false; int ret = 0, err; u32 index; if (nla == NULL) return -EINVAL; err = nla_parse_nested_deprecated(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy, NULL); if (err < 0) return err; if (tb[TCA_SKBEDIT_PARMS] == NULL) return -EINVAL; if (tb[TCA_SKBEDIT_PRIORITY] != NULL) { flags |= SKBEDIT_F_PRIORITY; priority = nla_data(tb[TCA_SKBEDIT_PRIORITY]); } if (tb[TCA_SKBEDIT_QUEUE_MAPPING] != NULL) { if (is_tcf_skbedit_ingress(act_flags) && !(act_flags & TCA_ACT_FLAGS_SKIP_SW)) { NL_SET_ERR_MSG_MOD(extack, "\"queue_mapping\" option on receive side is hardware only, use skip_sw"); return -EOPNOTSUPP; } flags |= SKBEDIT_F_QUEUE_MAPPING; queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]); } if (tb[TCA_SKBEDIT_PTYPE] != NULL) { ptype = nla_data(tb[TCA_SKBEDIT_PTYPE]); if (!skb_pkt_type_ok(*ptype)) return -EINVAL; flags |= SKBEDIT_F_PTYPE; } if (tb[TCA_SKBEDIT_MARK] != NULL) { flags |= SKBEDIT_F_MARK; mark = nla_data(tb[TCA_SKBEDIT_MARK]); } if (tb[TCA_SKBEDIT_MASK] != NULL) { flags |= SKBEDIT_F_MASK; mask = nla_data(tb[TCA_SKBEDIT_MASK]); } if (tb[TCA_SKBEDIT_FLAGS] != NULL) { u64 *pure_flags = nla_data(tb[TCA_SKBEDIT_FLAGS]); if (*pure_flags & SKBEDIT_F_TXQ_SKBHASH) { u16 *queue_mapping_max; if (!tb[TCA_SKBEDIT_QUEUE_MAPPING] || !tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]) { NL_SET_ERR_MSG_MOD(extack, "Missing required range of queue_mapping."); return -EINVAL; } queue_mapping_max = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]); if (*queue_mapping_max < *queue_mapping) { NL_SET_ERR_MSG_MOD(extack, "The range of queue_mapping is invalid, max < min."); return -EINVAL; } mapping_mod = *queue_mapping_max - *queue_mapping + 1; flags |= SKBEDIT_F_TXQ_SKBHASH; } if (*pure_flags & SKBEDIT_F_INHERITDSFIELD) flags |= SKBEDIT_F_INHERITDSFIELD; } parm = nla_data(tb[TCA_SKBEDIT_PARMS]); index = parm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; exists = err; if (exists && bind) return ACT_P_BOUND; if (!flags) { if (exists) tcf_idr_release(*a, bind); else tcf_idr_cleanup(tn, index); return -EINVAL; } if (!exists) { ret = tcf_idr_create(tn, index, est, a, &act_skbedit_ops, bind, true, act_flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; } d = to_skbedit(*a); ret = ACT_P_CREATED; } else { d = to_skbedit(*a); if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } } err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { err = -ENOMEM; goto put_chain; } params_new->flags = flags; if (flags & SKBEDIT_F_PRIORITY) params_new->priority = *priority; if (flags & SKBEDIT_F_QUEUE_MAPPING) { params_new->queue_mapping = *queue_mapping; params_new->mapping_mod = mapping_mod; } if (flags & SKBEDIT_F_MARK) params_new->mark = *mark; if (flags & SKBEDIT_F_PTYPE) params_new->ptype = *ptype; /* default behaviour is to use all the bits */ params_new->mask = 0xffffffff; if (flags & SKBEDIT_F_MASK) params_new->mask = *mask; spin_lock_bh(&d->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); params_new = rcu_replace_pointer(d->params, params_new, lockdep_is_held(&d->tcf_lock)); spin_unlock_bh(&d->tcf_lock); if (params_new) kfree_rcu(params_new, rcu); if (goto_ch) tcf_chain_put_by_act(goto_ch); return ret; put_chain: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: tcf_idr_release(*a, bind); return err; } static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_skbedit *d = to_skbedit(a); struct tcf_skbedit_params *params; struct tc_skbedit opt = { .index = d->tcf_index, .refcnt = refcount_read(&d->tcf_refcnt) - ref, .bindcnt = atomic_read(&d->tcf_bindcnt) - bind, }; u64 pure_flags = 0; struct tcf_t t; spin_lock_bh(&d->tcf_lock); params = rcu_dereference_protected(d->params, lockdep_is_held(&d->tcf_lock)); opt.action = d->tcf_action; if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt)) goto nla_put_failure; if ((params->flags & SKBEDIT_F_PRIORITY) && nla_put_u32(skb, TCA_SKBEDIT_PRIORITY, params->priority)) goto nla_put_failure; if ((params->flags & SKBEDIT_F_QUEUE_MAPPING) && nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING, params->queue_mapping)) goto nla_put_failure; if ((params->flags & SKBEDIT_F_MARK) && nla_put_u32(skb, TCA_SKBEDIT_MARK, params->mark)) goto nla_put_failure; if ((params->flags & SKBEDIT_F_PTYPE) && nla_put_u16(skb, TCA_SKBEDIT_PTYPE, params->ptype)) goto nla_put_failure; if ((params->flags & SKBEDIT_F_MASK) && nla_put_u32(skb, TCA_SKBEDIT_MASK, params->mask)) goto nla_put_failure; if (params->flags & SKBEDIT_F_INHERITDSFIELD) pure_flags |= SKBEDIT_F_INHERITDSFIELD; if (params->flags & SKBEDIT_F_TXQ_SKBHASH) { if (nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING_MAX, params->queue_mapping + params->mapping_mod - 1)) goto nla_put_failure; pure_flags |= SKBEDIT_F_TXQ_SKBHASH; } if (pure_flags != 0 && nla_put(skb, TCA_SKBEDIT_FLAGS, sizeof(pure_flags), &pure_flags)) goto nla_put_failure; tcf_tm_dump(&t, &d->tcf_tm); if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD)) goto nla_put_failure; spin_unlock_bh(&d->tcf_lock); return skb->len; nla_put_failure: spin_unlock_bh(&d->tcf_lock); nlmsg_trim(skb, b); return -1; } static void tcf_skbedit_cleanup(struct tc_action *a) { struct tcf_skbedit *d = to_skbedit(a); struct tcf_skbedit_params *params; params = rcu_dereference_protected(d->params, 1); if (params) kfree_rcu(params, rcu); } static size_t tcf_skbedit_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_skbedit)) + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_PRIORITY */ + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING */ + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING_MAX */ + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MARK */ + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_PTYPE */ + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MASK */ + nla_total_size_64bit(sizeof(u64)); /* TCA_SKBEDIT_FLAGS */ } static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data, u32 *index_inc, bool bind, struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; if (is_tcf_skbedit_mark(act)) { entry->id = FLOW_ACTION_MARK; entry->mark = tcf_skbedit_mark(act); } else if (is_tcf_skbedit_ptype(act)) { entry->id = FLOW_ACTION_PTYPE; entry->ptype = tcf_skbedit_ptype(act); } else if (is_tcf_skbedit_priority(act)) { entry->id = FLOW_ACTION_PRIORITY; entry->priority = tcf_skbedit_priority(act); } else if (is_tcf_skbedit_tx_queue_mapping(act)) { NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"queue_mapping\" option is used on transmit side"); return -EOPNOTSUPP; } else if (is_tcf_skbedit_rx_queue_mapping(act)) { entry->id = FLOW_ACTION_RX_QUEUE_MAPPING; entry->rx_queue = tcf_skbedit_rx_queue_mapping(act); } else if (is_tcf_skbedit_inheritdsfield(act)) { NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"inheritdsfield\" option is used"); return -EOPNOTSUPP; } else { NL_SET_ERR_MSG_MOD(extack, "Unsupported skbedit option offload"); return -EOPNOTSUPP; } *index_inc = 1; } else { struct flow_offload_action *fl_action = entry_data; if (is_tcf_skbedit_mark(act)) fl_action->id = FLOW_ACTION_MARK; else if (is_tcf_skbedit_ptype(act)) fl_action->id = FLOW_ACTION_PTYPE; else if (is_tcf_skbedit_priority(act)) fl_action->id = FLOW_ACTION_PRIORITY; else if (is_tcf_skbedit_rx_queue_mapping(act)) fl_action->id = FLOW_ACTION_RX_QUEUE_MAPPING; else return -EOPNOTSUPP; } return 0; } static struct tc_action_ops act_skbedit_ops = { .kind = "skbedit", .id = TCA_ID_SKBEDIT, .owner = THIS_MODULE, .act = tcf_skbedit_act, .stats_update = tcf_skbedit_stats_update, .dump = tcf_skbedit_dump, .init = tcf_skbedit_init, .cleanup = tcf_skbedit_cleanup, .get_fill_size = tcf_skbedit_get_fill_size, .offload_act_setup = tcf_skbedit_offload_act_setup, .size = sizeof(struct tcf_skbedit), }; MODULE_ALIAS_NET_ACT("skbedit"); static __net_init int skbedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_skbedit_ops.net_id); return tc_action_net_init(net, tn, &act_skbedit_ops); } static void __net_exit skbedit_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_skbedit_ops.net_id); } static struct pernet_operations skbedit_net_ops = { .init = skbedit_init_net, .exit_batch = skbedit_exit_net, .id = &act_skbedit_ops.net_id, .size = sizeof(struct tc_action_net), }; MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>"); MODULE_DESCRIPTION("SKB Editing"); MODULE_LICENSE("GPL"); static int __init skbedit_init_module(void) { return tcf_register_action(&act_skbedit_ops, &skbedit_net_ops); } static void __exit skbedit_cleanup_module(void) { tcf_unregister_action(&act_skbedit_ops, &skbedit_net_ops); } module_init(skbedit_init_module); module_exit(skbedit_cleanup_module);
6 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 200 200 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_skbmod.c skb data modifier * * Copyright (c) 2016 Jamal Hadi Salim <jhs@mojatatu.com> */ #include <linux/module.h> #include <linux/if_arp.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <net/inet_ecn.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <net/tc_wrapper.h> #include <linux/tc_act/tc_skbmod.h> #include <net/tc_act/tc_skbmod.h> static struct tc_action_ops act_skbmod_ops; TC_INDIRECT_SCOPE int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_skbmod *d = to_skbmod(a); int action, max_edit_len, err; struct tcf_skbmod_params *p; u64 flags; tcf_lastuse_update(&d->tcf_tm); bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb); action = READ_ONCE(d->tcf_action); if (unlikely(action == TC_ACT_SHOT)) goto drop; max_edit_len = skb_mac_header_len(skb); p = rcu_dereference_bh(d->skbmod_p); flags = p->flags; /* tcf_skbmod_init() guarantees "flags" to be one of the following: * 1. a combination of SKBMOD_F_{DMAC,SMAC,ETYPE} * 2. SKBMOD_F_SWAPMAC * 3. SKBMOD_F_ECN * SKBMOD_F_ECN only works with IP packets; all other flags only work with Ethernet * packets. */ if (flags == SKBMOD_F_ECN) { switch (skb_protocol(skb, true)) { case cpu_to_be16(ETH_P_IP): case cpu_to_be16(ETH_P_IPV6): max_edit_len += skb_network_header_len(skb); break; default: goto out; } } else if (!skb->dev || skb->dev->type != ARPHRD_ETHER) { goto out; } err = skb_ensure_writable(skb, max_edit_len); if (unlikely(err)) /* best policy is to drop on the floor */ goto drop; if (flags & SKBMOD_F_DMAC) ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst); if (flags & SKBMOD_F_SMAC) ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src); if (flags & SKBMOD_F_ETYPE) eth_hdr(skb)->h_proto = p->eth_type; if (flags & SKBMOD_F_SWAPMAC) { u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */ /*XXX: I am sure we can come up with more efficient swapping*/ ether_addr_copy((u8 *)tmpaddr, eth_hdr(skb)->h_dest); ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source); ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr); } if (flags & SKBMOD_F_ECN) INET_ECN_set_ce(skb); out: return action; drop: qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); return TC_ACT_SHOT; } static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = { [TCA_SKBMOD_PARMS] = { .len = sizeof(struct tc_skbmod) }, [TCA_SKBMOD_DMAC] = { .len = ETH_ALEN }, [TCA_SKBMOD_SMAC] = { .len = ETH_ALEN }, [TCA_SKBMOD_ETYPE] = { .type = NLA_U16 }, }; static int tcf_skbmod_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_skbmod_ops.net_id); bool ovr = flags & TCA_ACT_FLAGS_REPLACE; bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_SKBMOD_MAX + 1]; struct tcf_skbmod_params *p, *p_old; struct tcf_chain *goto_ch = NULL; struct tc_skbmod *parm; u32 lflags = 0, index; struct tcf_skbmod *d; bool exists = false; u8 *daddr = NULL; u8 *saddr = NULL; u16 eth_type = 0; int ret = 0, err; if (!nla) return -EINVAL; err = nla_parse_nested_deprecated(tb, TCA_SKBMOD_MAX, nla, skbmod_policy, NULL); if (err < 0) return err; if (!tb[TCA_SKBMOD_PARMS]) return -EINVAL; if (tb[TCA_SKBMOD_DMAC]) { daddr = nla_data(tb[TCA_SKBMOD_DMAC]); lflags |= SKBMOD_F_DMAC; } if (tb[TCA_SKBMOD_SMAC]) { saddr = nla_data(tb[TCA_SKBMOD_SMAC]); lflags |= SKBMOD_F_SMAC; } if (tb[TCA_SKBMOD_ETYPE]) { eth_type = nla_get_u16(tb[TCA_SKBMOD_ETYPE]); lflags |= SKBMOD_F_ETYPE; } parm = nla_data(tb[TCA_SKBMOD_PARMS]); index = parm->index; if (parm->flags & SKBMOD_F_SWAPMAC) lflags = SKBMOD_F_SWAPMAC; if (parm->flags & SKBMOD_F_ECN) lflags = SKBMOD_F_ECN; err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; exists = err; if (exists && bind) return ACT_P_BOUND; if (!lflags) { if (exists) tcf_idr_release(*a, bind); else tcf_idr_cleanup(tn, index); return -EINVAL; } if (!exists) { ret = tcf_idr_create(tn, index, est, a, &act_skbmod_ops, bind, true, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; } ret = ACT_P_CREATED; } else if (!ovr) { tcf_idr_release(*a, bind); return -EEXIST; } err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; d = to_skbmod(*a); p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); if (unlikely(!p)) { err = -ENOMEM; goto put_chain; } p->flags = lflags; if (ovr) spin_lock_bh(&d->tcf_lock); /* Protected by tcf_lock if overwriting existing action. */ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); p_old = rcu_dereference_protected(d->skbmod_p, 1); if (lflags & SKBMOD_F_DMAC) ether_addr_copy(p->eth_dst, daddr); if (lflags & SKBMOD_F_SMAC) ether_addr_copy(p->eth_src, saddr); if (lflags & SKBMOD_F_ETYPE) p->eth_type = htons(eth_type); rcu_assign_pointer(d->skbmod_p, p); if (ovr) spin_unlock_bh(&d->tcf_lock); if (p_old) kfree_rcu(p_old, rcu); if (goto_ch) tcf_chain_put_by_act(goto_ch); return ret; put_chain: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: tcf_idr_release(*a, bind); return err; } static void tcf_skbmod_cleanup(struct tc_action *a) { struct tcf_skbmod *d = to_skbmod(a); struct tcf_skbmod_params *p; p = rcu_dereference_protected(d->skbmod_p, 1); if (p) kfree_rcu(p, rcu); } static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { struct tcf_skbmod *d = to_skbmod(a); unsigned char *b = skb_tail_pointer(skb); struct tcf_skbmod_params *p; struct tc_skbmod opt; struct tcf_t t; memset(&opt, 0, sizeof(opt)); opt.index = d->tcf_index; opt.refcnt = refcount_read(&d->tcf_refcnt) - ref; opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind; spin_lock_bh(&d->tcf_lock); opt.action = d->tcf_action; p = rcu_dereference_protected(d->skbmod_p, lockdep_is_held(&d->tcf_lock)); opt.flags = p->flags; if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt)) goto nla_put_failure; if ((p->flags & SKBMOD_F_DMAC) && nla_put(skb, TCA_SKBMOD_DMAC, ETH_ALEN, p->eth_dst)) goto nla_put_failure; if ((p->flags & SKBMOD_F_SMAC) && nla_put(skb, TCA_SKBMOD_SMAC, ETH_ALEN, p->eth_src)) goto nla_put_failure; if ((p->flags & SKBMOD_F_ETYPE) && nla_put_u16(skb, TCA_SKBMOD_ETYPE, ntohs(p->eth_type))) goto nla_put_failure; tcf_tm_dump(&t, &d->tcf_tm); if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD)) goto nla_put_failure; spin_unlock_bh(&d->tcf_lock); return skb->len; nla_put_failure: spin_unlock_bh(&d->tcf_lock); nlmsg_trim(skb, b); return -1; } static struct tc_action_ops act_skbmod_ops = { .kind = "skbmod", .id = TCA_ACT_SKBMOD, .owner = THIS_MODULE, .act = tcf_skbmod_act, .dump = tcf_skbmod_dump, .init = tcf_skbmod_init, .cleanup = tcf_skbmod_cleanup, .size = sizeof(struct tcf_skbmod), }; MODULE_ALIAS_NET_ACT("skbmod"); static __net_init int skbmod_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_skbmod_ops.net_id); return tc_action_net_init(net, tn, &act_skbmod_ops); } static void __net_exit skbmod_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_skbmod_ops.net_id); } static struct pernet_operations skbmod_net_ops = { .init = skbmod_init_net, .exit_batch = skbmod_exit_net, .id = &act_skbmod_ops.net_id, .size = sizeof(struct tc_action_net), }; MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>"); MODULE_DESCRIPTION("SKB data mod-ing"); MODULE_LICENSE("GPL"); static int __init skbmod_init_module(void) { return tcf_register_action(&act_skbmod_ops, &skbmod_net_ops); } static void __exit skbmod_cleanup_module(void) { tcf_unregister_action(&act_skbmod_ops, &skbmod_net_ops); } module_init(skbmod_init_module); module_exit(skbmod_cleanup_module);
722 29 113 2 1071 1 2095 2094 1071 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_LWTUNNEL_H #define __NET_LWTUNNEL_H 1 #include <linux/lwtunnel.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/types.h> #include <net/route.h> #define LWTUNNEL_HASH_BITS 7 #define LWTUNNEL_HASH_SIZE (1 << LWTUNNEL_HASH_BITS) /* lw tunnel state flags */ #define LWTUNNEL_STATE_OUTPUT_REDIRECT BIT(0) #define LWTUNNEL_STATE_INPUT_REDIRECT BIT(1) #define LWTUNNEL_STATE_XMIT_REDIRECT BIT(2) /* LWTUNNEL_XMIT_CONTINUE should be distinguishable from dst_output return * values (NET_XMIT_xxx and NETDEV_TX_xxx in linux/netdevice.h) for safety. */ enum { LWTUNNEL_XMIT_DONE, LWTUNNEL_XMIT_CONTINUE = 0x100, }; struct lwtunnel_state { __u16 type; __u16 flags; __u16 headroom; atomic_t refcnt; int (*orig_output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*orig_input)(struct sk_buff *); struct rcu_head rcu; __u8 data[]; }; struct lwtunnel_encap_ops { int (*build_state)(struct net *net, struct nlattr *encap, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack); void (*destroy_state)(struct lwtunnel_state *lws); int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*input)(struct sk_buff *skb); int (*fill_encap)(struct sk_buff *skb, struct lwtunnel_state *lwtstate); int (*get_encap_size)(struct lwtunnel_state *lwtstate); int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b); int (*xmit)(struct sk_buff *skb); struct module *owner; }; #ifdef CONFIG_LWTUNNEL DECLARE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); void lwtstate_free(struct lwtunnel_state *lws); static inline struct lwtunnel_state * lwtstate_get(struct lwtunnel_state *lws) { if (lws) atomic_inc(&lws->refcnt); return lws; } static inline void lwtstate_put(struct lwtunnel_state *lws) { if (!lws) return; if (atomic_dec_and_test(&lws->refcnt)) lwtstate_free(lws); } static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate) { if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_OUTPUT_REDIRECT)) return true; return false; } static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate) { if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_INPUT_REDIRECT)) return true; return false; } static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate) { if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_XMIT_REDIRECT)) return true; return false; } static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate, unsigned int mtu) { if ((lwtunnel_xmit_redirect(lwtstate) || lwtunnel_output_redirect(lwtstate)) && lwtstate->headroom < mtu) return lwtstate->headroom; return 0; } int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num); int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, unsigned int num); int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack); int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len, struct netlink_ext_ack *extack); int lwtunnel_build_state(struct net *net, u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, struct lwtunnel_state **lws, struct netlink_ext_ack *extack); int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, int encap_attr, int encap_type_attr); int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate); struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len); int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b); int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb); int lwtunnel_input(struct sk_buff *skb); int lwtunnel_xmit(struct sk_buff *skb); int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress); static inline void lwtunnel_set_redirect(struct dst_entry *dst) { if (lwtunnel_output_redirect(dst->lwtstate)) { dst->lwtstate->orig_output = dst->output; dst->output = lwtunnel_output; } if (lwtunnel_input_redirect(dst->lwtstate)) { dst->lwtstate->orig_input = dst->input; dst->input = lwtunnel_input; } } #else static inline void lwtstate_free(struct lwtunnel_state *lws) { } static inline struct lwtunnel_state * lwtstate_get(struct lwtunnel_state *lws) { return lws; } static inline void lwtstate_put(struct lwtunnel_state *lws) { } static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate) { return false; } static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate) { return false; } static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate) { return false; } static inline void lwtunnel_set_redirect(struct dst_entry *dst) { } static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate, unsigned int mtu) { return 0; } static inline int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num) { return -EOPNOTSUPP; } static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, unsigned int num) { return -EOPNOTSUPP; } static inline int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) { NL_SET_ERR_MSG(extack, "CONFIG_LWTUNNEL is not enabled in this kernel"); return -EOPNOTSUPP; } static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len, struct netlink_ext_ack *extack) { /* return 0 since we are not walking attr looking for * RTA_ENCAP_TYPE attribute on nexthops. */ return 0; } static inline int lwtunnel_build_state(struct net *net, u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, struct lwtunnel_state **lws, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static inline int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, int encap_attr, int encap_type_attr) { return 0; } static inline int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) { return 0; } static inline struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len) { return NULL; } static inline int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) { return 0; } static inline int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) { return -EOPNOTSUPP; } static inline int lwtunnel_input(struct sk_buff *skb) { return -EOPNOTSUPP; } static inline int lwtunnel_xmit(struct sk_buff *skb) { return -EOPNOTSUPP; } #endif /* CONFIG_LWTUNNEL */ #define MODULE_ALIAS_RTNL_LWT(encap_type) MODULE_ALIAS("rtnl-lwt-" __stringify(encap_type)) #endif /* __NET_LWTUNNEL_H */
10 92 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_TABLES_IPV6_H_ #define _NF_TABLES_IPV6_H_ #include <linux/netfilter_ipv6/ip6_tables.h> #include <net/ipv6.h> #include <net/netfilter/nf_tables.h> static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt) { unsigned int flags = IP6_FH_F_AUTH; int protohdr, thoff = 0; unsigned short frag_off; protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); if (protohdr < 0 || thoff > U16_MAX) { nft_set_pktinfo_unspec(pkt); return; } pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = protohdr; pkt->thoff = thoff; pkt->fragoff = frag_off; } static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) { #if IS_ENABLED(CONFIG_IPV6) unsigned int flags = IP6_FH_F_AUTH; struct ipv6hdr *ip6h, _ip6h; unsigned int thoff = 0; unsigned short frag_off; u32 pkt_len, skb_len; int protohdr; ip6h = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb), sizeof(*ip6h), &_ip6h); if (!ip6h) return -1; if (ip6h->version != 6) return -1; pkt_len = ntohs(ip6h->payload_len); skb_len = pkt->skb->len - skb_network_offset(pkt->skb); if (pkt_len + sizeof(*ip6h) > skb_len) return -1; protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); if (protohdr < 0 || thoff > U16_MAX) return -1; pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = protohdr; pkt->thoff = thoff; pkt->fragoff = frag_off; return 0; #else return -1; #endif } static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) { if (__nft_set_pktinfo_ipv6_validate(pkt) < 0) nft_set_pktinfo_unspec(pkt); } static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt) { #if IS_ENABLED(CONFIG_IPV6) unsigned int flags = IP6_FH_F_AUTH; unsigned short frag_off; unsigned int thoff = 0; struct inet6_dev *idev; struct ipv6hdr *ip6h; int protohdr; u32 pkt_len; if (!pskb_may_pull(pkt->skb, sizeof(*ip6h))) return -1; ip6h = ipv6_hdr(pkt->skb); if (ip6h->version != 6) goto inhdr_error; pkt_len = ntohs(ip6h->payload_len); if (pkt_len + sizeof(*ip6h) > pkt->skb->len) { idev = __in6_dev_get(nft_in(pkt)); __IP6_INC_STATS(nft_net(pkt), idev, IPSTATS_MIB_INTRUNCATEDPKTS); return -1; } protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); if (protohdr < 0 || thoff > U16_MAX) goto inhdr_error; pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = protohdr; pkt->thoff = thoff; pkt->fragoff = frag_off; return 0; inhdr_error: idev = __in6_dev_get(nft_in(pkt)); __IP6_INC_STATS(nft_net(pkt), idev, IPSTATS_MIB_INHDRERRORS); return -1; #else return -1; #endif } #endif
653 654 655 654 132 653 655 92 92 92 22964 22950 341 339 1369 1379 21938 20221 7427 2 2853 21922 21925 21930 21931 655 21589 869 8 156 129 2 2 2 27 27 27 29 653 654 654 7 5 43 43 1290 109 109 18777 5785 21266 1290 20229 22451 22503 21225 30 21233 553 21238 382 22365 22072 711 488 551 22444 22442 20479 14918 22433 22439 22447 478 22353 1586 5934 227 6729 6734 5823 176 836 836 839 51 1043 143 1016 983 230 829 6 6 6 19985 10971 19562 20 16 16 16 16 16 5 5 16 16 18632 18634 18624 18612 18492 18501 12 12 57 57 7 48 7 44 53 2 49 6 49 57 54 9 8 2 5 54 1 9 44 20 11 19 24 9 44 52 26 45 19 2 17 13 1 12 3 13 12 135 133 2 2 11 11 8 5 5 5 5 3 5 5 5 5 2 5 3 5 5 2 5 11 18505 18515 18192 629 18203 147 2789 2698 243 28 196 197 196 134 135 122 43 2847 2852 3 2709 663 657 8 665 95 665 2858 2849 2 2 40 40 56 56 3 1 3 52 22 22 7 7 7 11 11 11 22475 22472 2421 2413 12553 12611 59 3 62 2878 82 2830 134 133 48 12 91 89 132 108 127 5 3 1 1 2 1 2 91 47 132 34 201 134 2 198 277 276 97 253 21 4 3 3 3 18 21 21 53 245 79 209 209 7 210 79 211 101 1215 320 431 669 131 163 361 360 37 358 49 87 25 87 23 11 4 8 12 8 9 8 12 2 12 2 6 12 12 12 12 11 10 10 12 12 38 3 33 2 1 5 5 1 5 939 13 870 63 49 66 65 66 7 4 292 10 285 8 5 15 15 2 15 15 1 3 3 1 196 197 2 2 2 2 10 8 2 2 8 763 61 715 6 754 2906 2903 1062 1045 87 87 353 354 3 4 1 1 4 20 15960 19 48 99 101 42 100 100 45 101 101 101 101 90 90 93 93 90 90 90 81 48 89 90 88 1 43 43 16 83 89 22 22 90 46 46 4 5 46 46 275 273 276 65 67 26 42 67 67 28 1 40 67 2 65 65 2 2 2 2 67 67 67 67 2 6 6 13 47 43 44 43 44 42 42 42 7 7 67 67 1 1 27 40 20 9 18 1 9 5 5 5 5 5 2 3 9 9 9 3 20 20 11 11 11 11 106 81 21 81 81 81 118 81 71 64 102 64 64 39 24 1 64 21 42 29 64 63 64 64 42 11 8 3 42 22 64 29 240 1 1 240 9 33 32 9 24 33 33 33 6 117 2 115 211 211 191 9 38 38 170 2 83 2 54 107 109 109 94 56 109 94 275 277 275 276 275 97 96 4 92 97 100 100 100 97 97 97 77 35 96 100 101 101 4 4 4 4 4 4 1788 1787 2 1789 1 398 187 321 398 397 64 25 59 40 13 47 39 85 5 39 39 39 48 5 5 46 47 47 26 26 26 27 97 96 31 68 26 18 16 27 27 15 27 15 15 15 15 45 22 46 46 5 38 10 31 188 200 201 195 10 5 199 1 654 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 // SPDX-License-Identifier: GPL-2.0-or-later /* * Routines having to do with the 'struct sk_buff' memory handlers. * * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> * Florian La Roche <rzsfl@rz.uni-sb.de> * * Fixes: * Alan Cox : Fixed the worst of the load * balancer bugs. * Dave Platt : Interrupt stacking fix. * Richard Kooijman : Timestamp fixes. * Alan Cox : Changed buffer format. * Alan Cox : destructor hook for AF_UNIX etc. * Linus Torvalds : Better skb_clone. * Alan Cox : Added skb_copy. * Alan Cox : Added all the changed routines Linus * only put in the headers * Ray VanTassle : Fixed --skb->lock in free * Alan Cox : skb_copy copy arp field * Andi Kleen : slabified it. * Robert Olsson : Removed skb_head_pool * * NOTE: * The __skb_ routines should be called with interrupts * disabled, or you better be *real* sure that the operation is atomic * with respect to whatever list is being frobbed (e.g. via lock_sock() * or via disabling bottom half handlers, etc). */ /* * The functions in this file will not compile correctly with gcc 2.4.x */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/slab.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/sctp.h> #include <linux/netdevice.h> #ifdef CONFIG_NET_CLS_ACT #include <net/pkt_sched.h> #endif #include <linux/string.h> #include <linux/skbuff.h> #include <linux/skbuff_ref.h> #include <linux/splice.h> #include <linux/cache.h> #include <linux/rtnetlink.h> #include <linux/init.h> #include <linux/scatterlist.h> #include <linux/errqueue.h> #include <linux/prefetch.h> #include <linux/bitfield.h> #include <linux/if_vlan.h> #include <linux/mpls.h> #include <linux/kcov.h> #include <linux/iov_iter.h> #include <linux/crc32.h> #include <net/protocol.h> #include <net/dst.h> #include <net/sock.h> #include <net/checksum.h> #include <net/gro.h> #include <net/gso.h> #include <net/hotdata.h> #include <net/ip6_checksum.h> #include <net/xfrm.h> #include <net/mpls.h> #include <net/mptcp.h> #include <net/mctp.h> #include <net/page_pool/helpers.h> #include <net/dropreason.h> #include <linux/uaccess.h> #include <trace/events/skb.h> #include <linux/highmem.h> #include <linux/capability.h> #include <linux/user_namespace.h> #include <linux/indirect_call_wrapper.h> #include <linux/textsearch.h> #include "dev.h" #include "devmem.h" #include "netmem_priv.h" #include "sock_destructor.h" #ifdef CONFIG_SKB_EXTENSIONS static struct kmem_cache *skbuff_ext_cache __ro_after_init; #endif #define GRO_MAX_HEAD_PAD (GRO_MAX_HEAD + NET_SKB_PAD + NET_IP_ALIGN) #define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(max(MAX_TCP_HEADER, \ GRO_MAX_HEAD_PAD)) /* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two. * This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique * size, and we can differentiate heads from skb_small_head_cache * vs system slabs by looking at their size (skb_end_offset()). */ #define SKB_SMALL_HEAD_CACHE_SIZE \ (is_power_of_2(SKB_SMALL_HEAD_SIZE) ? \ (SKB_SMALL_HEAD_SIZE + L1_CACHE_BYTES) : \ SKB_SMALL_HEAD_SIZE) #define SKB_SMALL_HEAD_HEADROOM \ SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) /* kcm_write_msgs() relies on casting paged frags to bio_vec to use * iov_iter_bvec(). These static asserts ensure the cast is valid is long as the * netmem is a page. */ static_assert(offsetof(struct bio_vec, bv_page) == offsetof(skb_frag_t, netmem)); static_assert(sizeof_field(struct bio_vec, bv_page) == sizeof_field(skb_frag_t, netmem)); static_assert(offsetof(struct bio_vec, bv_len) == offsetof(skb_frag_t, len)); static_assert(sizeof_field(struct bio_vec, bv_len) == sizeof_field(skb_frag_t, len)); static_assert(offsetof(struct bio_vec, bv_offset) == offsetof(skb_frag_t, offset)); static_assert(sizeof_field(struct bio_vec, bv_offset) == sizeof_field(skb_frag_t, offset)); #undef FN #define FN(reason) [SKB_DROP_REASON_##reason] = #reason, static const char * const drop_reasons[] = { [SKB_CONSUMED] = "CONSUMED", DEFINE_DROP_REASON(FN, FN) }; static const struct drop_reason_list drop_reasons_core = { .reasons = drop_reasons, .n_reasons = ARRAY_SIZE(drop_reasons), }; const struct drop_reason_list __rcu * drop_reasons_by_subsys[SKB_DROP_REASON_SUBSYS_NUM] = { [SKB_DROP_REASON_SUBSYS_CORE] = RCU_INITIALIZER(&drop_reasons_core), }; EXPORT_SYMBOL(drop_reasons_by_subsys); /** * drop_reasons_register_subsys - register another drop reason subsystem * @subsys: the subsystem to register, must not be the core * @list: the list of drop reasons within the subsystem, must point to * a statically initialized list */ void drop_reasons_register_subsys(enum skb_drop_reason_subsys subsys, const struct drop_reason_list *list) { if (WARN(subsys <= SKB_DROP_REASON_SUBSYS_CORE || subsys >= ARRAY_SIZE(drop_reasons_by_subsys), "invalid subsystem %d\n", subsys)) return; /* must point to statically allocated memory, so INIT is OK */ RCU_INIT_POINTER(drop_reasons_by_subsys[subsys], list); } EXPORT_SYMBOL_GPL(drop_reasons_register_subsys); /** * drop_reasons_unregister_subsys - unregister a drop reason subsystem * @subsys: the subsystem to remove, must not be the core * * Note: This will synchronize_rcu() to ensure no users when it returns. */ void drop_reasons_unregister_subsys(enum skb_drop_reason_subsys subsys) { if (WARN(subsys <= SKB_DROP_REASON_SUBSYS_CORE || subsys >= ARRAY_SIZE(drop_reasons_by_subsys), "invalid subsystem %d\n", subsys)) return; RCU_INIT_POINTER(drop_reasons_by_subsys[subsys], NULL); synchronize_rcu(); } EXPORT_SYMBOL_GPL(drop_reasons_unregister_subsys); /** * skb_panic - private function for out-of-line support * @skb: buffer * @sz: size * @addr: address * @msg: skb_over_panic or skb_under_panic * * Out-of-line support for skb_put() and skb_push(). * Called via the wrapper skb_over_panic() or skb_under_panic(). * Keep out of line to prevent kernel bloat. * __builtin_return_address is not used because it is not always reliable. */ static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr, const char msg[]) { pr_emerg("%s: text:%px len:%d put:%d head:%px data:%px tail:%#lx end:%#lx dev:%s\n", msg, addr, skb->len, sz, skb->head, skb->data, (unsigned long)skb->tail, (unsigned long)skb->end, skb->dev ? skb->dev->name : "<NULL>"); BUG(); } static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr) { skb_panic(skb, sz, addr, __func__); } static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr) { skb_panic(skb, sz, addr, __func__); } #define NAPI_SKB_CACHE_SIZE 64 #define NAPI_SKB_CACHE_BULK 16 #define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2) struct napi_alloc_cache { local_lock_t bh_lock; struct page_frag_cache page; unsigned int skb_count; void *skb_cache[NAPI_SKB_CACHE_SIZE]; }; static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); void *data; fragsz = SKB_DATA_ALIGN(fragsz); local_lock_nested_bh(&napi_alloc_cache.bh_lock); data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC | __GFP_NOWARN, align_mask); local_unlock_nested_bh(&napi_alloc_cache.bh_lock); return data; } EXPORT_SYMBOL(__napi_alloc_frag_align); void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { void *data; if (in_hardirq() || irqs_disabled()) { struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache); fragsz = SKB_DATA_ALIGN(fragsz); data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC | __GFP_NOWARN, align_mask); } else { local_bh_disable(); data = __napi_alloc_frag_align(fragsz, align_mask); local_bh_enable(); } return data; } EXPORT_SYMBOL(__netdev_alloc_frag_align); static struct sk_buff *napi_skb_cache_get(void) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); struct sk_buff *skb; local_lock_nested_bh(&napi_alloc_cache.bh_lock); if (unlikely(!nc->skb_count)) { nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, GFP_ATOMIC | __GFP_NOWARN, NAPI_SKB_CACHE_BULK, nc->skb_cache); if (unlikely(!nc->skb_count)) { local_unlock_nested_bh(&napi_alloc_cache.bh_lock); return NULL; } } skb = nc->skb_cache[--nc->skb_count]; local_unlock_nested_bh(&napi_alloc_cache.bh_lock); kasan_mempool_unpoison_object(skb, kmem_cache_size(net_hotdata.skbuff_cache)); return skb; } /** * napi_skb_cache_get_bulk - obtain a number of zeroed skb heads from the cache * @skbs: pointer to an at least @n-sized array to fill with skb pointers * @n: number of entries to provide * * Tries to obtain @n &sk_buff entries from the NAPI percpu cache and writes * the pointers into the provided array @skbs. If there are less entries * available, tries to replenish the cache and bulk-allocates the diff from * the MM layer if needed. * The heads are being zeroed with either memset() or %__GFP_ZERO, so they are * ready for {,__}build_skb_around() and don't have any data buffers attached. * Must be called *only* from the BH context. * * Return: number of successfully allocated skbs (@n if no actual allocation * needed or kmem_cache_alloc_bulk() didn't fail). */ u32 napi_skb_cache_get_bulk(void **skbs, u32 n) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); u32 bulk, total = n; local_lock_nested_bh(&napi_alloc_cache.bh_lock); if (nc->skb_count >= n) goto get; /* No enough cached skbs. Try refilling the cache first */ bulk = min(NAPI_SKB_CACHE_SIZE - nc->skb_count, NAPI_SKB_CACHE_BULK); nc->skb_count += kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, GFP_ATOMIC | __GFP_NOWARN, bulk, &nc->skb_cache[nc->skb_count]); if (likely(nc->skb_count >= n)) goto get; /* Still not enough. Bulk-allocate the missing part directly, zeroed */ n -= kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, GFP_ATOMIC | __GFP_ZERO | __GFP_NOWARN, n - nc->skb_count, &skbs[nc->skb_count]); if (likely(nc->skb_count >= n)) goto get; /* kmem_cache didn't allocate the number we need, limit the output */ total -= n - nc->skb_count; n = nc->skb_count; get: for (u32 base = nc->skb_count - n, i = 0; i < n; i++) { u32 cache_size = kmem_cache_size(net_hotdata.skbuff_cache); skbs[i] = nc->skb_cache[base + i]; kasan_mempool_unpoison_object(skbs[i], cache_size); memset(skbs[i], 0, offsetof(struct sk_buff, tail)); } nc->skb_count -= n; local_unlock_nested_bh(&napi_alloc_cache.bh_lock); return total; } EXPORT_SYMBOL_GPL(napi_skb_cache_get_bulk); static inline void __finalize_skb_around(struct sk_buff *skb, void *data, unsigned int size) { struct skb_shared_info *shinfo; size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); /* Assumes caller memset cleared SKB */ skb->truesize = SKB_TRUESIZE(size); refcount_set(&skb->users, 1); skb->head = data; skb->data = data; skb_reset_tail_pointer(skb); skb_set_end_offset(skb, size); skb->mac_header = (typeof(skb->mac_header))~0U; skb->transport_header = (typeof(skb->transport_header))~0U; skb->alloc_cpu = raw_smp_processor_id(); /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); atomic_set(&shinfo->dataref, 1); skb_set_kcov_handle(skb, kcov_common_handle()); } static inline void *__slab_build_skb(struct sk_buff *skb, void *data, unsigned int *size) { void *resized; /* Must find the allocation size (and grow it to match). */ *size = ksize(data); /* krealloc() will immediately return "data" when * "ksize(data)" is requested: it is the existing upper * bounds. As a result, GFP_ATOMIC will be ignored. Note * that this "new" pointer needs to be passed back to the * caller for use so the __alloc_size hinting will be * tracked correctly. */ resized = krealloc(data, *size, GFP_ATOMIC); WARN_ON_ONCE(resized != data); return resized; } /* build_skb() variant which can operate on slab buffers. * Note that this should be used sparingly as slab buffers * cannot be combined efficiently by GRO! */ struct sk_buff *slab_build_skb(void *data) { struct sk_buff *skb; unsigned int size; skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; memset(skb, 0, offsetof(struct sk_buff, tail)); data = __slab_build_skb(skb, data, &size); __finalize_skb_around(skb, data, size); return skb; } EXPORT_SYMBOL(slab_build_skb); /* Caller must provide SKB that is memset cleared */ static void __build_skb_around(struct sk_buff *skb, void *data, unsigned int frag_size) { unsigned int size = frag_size; /* frag_size == 0 is considered deprecated now. Callers * using slab buffer should use slab_build_skb() instead. */ if (WARN_ONCE(size == 0, "Use slab_build_skb() instead")) data = __slab_build_skb(skb, data, &size); __finalize_skb_around(skb, data, size); } /** * __build_skb - build a network buffer * @data: data buffer provided by caller * @frag_size: size of data (must not be 0) * * Allocate a new &sk_buff. Caller provides space holding head and * skb_shared_info. @data must have been allocated from the page * allocator or vmalloc(). (A @frag_size of 0 to indicate a kmalloc() * allocation is deprecated, and callers should use slab_build_skb() * instead.) * The return is the new skb buffer. * On a failure the return is %NULL, and @data is not freed. * Notes : * Before IO, driver allocates only data buffer where NIC put incoming frame * Driver should add room at head (NET_SKB_PAD) and * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info)) * After IO, driver calls build_skb(), to allocate sk_buff and populate it * before giving packet to stack. * RX rings only contains data buffers, not full skbs. */ struct sk_buff *__build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb; skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; memset(skb, 0, offsetof(struct sk_buff, tail)); __build_skb_around(skb, data, frag_size); return skb; } /* build_skb() is wrapper over __build_skb(), that specifically * takes care of skb->head and skb->pfmemalloc */ struct sk_buff *build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb = __build_skb(data, frag_size); if (likely(skb && frag_size)) { skb->head_frag = 1; skb_propagate_pfmemalloc(virt_to_head_page(data), skb); } return skb; } EXPORT_SYMBOL(build_skb); /** * build_skb_around - build a network buffer around provided skb * @skb: sk_buff provide by caller, must be memset cleared * @data: data buffer provided by caller * @frag_size: size of data */ struct sk_buff *build_skb_around(struct sk_buff *skb, void *data, unsigned int frag_size) { if (unlikely(!skb)) return NULL; __build_skb_around(skb, data, frag_size); if (frag_size) { skb->head_frag = 1; skb_propagate_pfmemalloc(virt_to_head_page(data), skb); } return skb; } EXPORT_SYMBOL(build_skb_around); /** * __napi_build_skb - build a network buffer * @data: data buffer provided by caller * @frag_size: size of data * * Version of __build_skb() that uses NAPI percpu caches to obtain * skbuff_head instead of inplace allocation. * * Returns a new &sk_buff on success, %NULL on allocation failure. */ static struct sk_buff *__napi_build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb; skb = napi_skb_cache_get(); if (unlikely(!skb)) return NULL; memset(skb, 0, offsetof(struct sk_buff, tail)); __build_skb_around(skb, data, frag_size); return skb; } /** * napi_build_skb - build a network buffer * @data: data buffer provided by caller * @frag_size: size of data * * Version of __napi_build_skb() that takes care of skb->head_frag * and skb->pfmemalloc when the data is a page or page fragment. * * Returns a new &sk_buff on success, %NULL on allocation failure. */ struct sk_buff *napi_build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb = __napi_build_skb(data, frag_size); if (likely(skb) && frag_size) { skb->head_frag = 1; skb_propagate_pfmemalloc(virt_to_head_page(data), skb); } return skb; } EXPORT_SYMBOL(napi_build_skb); /* * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells * the caller if emergency pfmemalloc reserves are being used. If it is and * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves * may be used. Otherwise, the packet data may be discarded until enough * memory is free */ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node, bool *pfmemalloc) { bool ret_pfmemalloc = false; size_t obj_size; void *obj; obj_size = SKB_HEAD_ALIGN(*size); if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE && !(flags & KMALLOC_NOT_NORMAL_BITS)) { obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache, flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node); *size = SKB_SMALL_HEAD_CACHE_SIZE; if (obj || !(gfp_pfmemalloc_allowed(flags))) goto out; /* Try again but now we are using pfmemalloc reserves */ ret_pfmemalloc = true; obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache, flags, node); goto out; } obj_size = kmalloc_size_roundup(obj_size); /* The following cast might truncate high-order bits of obj_size, this * is harmless because kmalloc(obj_size >= 2^32) will fail anyway. */ *size = (unsigned int)obj_size; /* * Try a regular allocation, when that fails and we're not entitled * to the reserves, fail. */ obj = kmalloc_node_track_caller(obj_size, flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node); if (obj || !(gfp_pfmemalloc_allowed(flags))) goto out; /* Try again but now we are using pfmemalloc reserves */ ret_pfmemalloc = true; obj = kmalloc_node_track_caller(obj_size, flags, node); out: if (pfmemalloc) *pfmemalloc = ret_pfmemalloc; return obj; } /* Allocate a new skbuff. We do this ourselves so we can fill in a few * 'private' fields and also do memory statistics to find all the * [BEEP] leaks. * */ /** * __alloc_skb - allocate a network buffer * @size: size to allocate * @gfp_mask: allocation mask * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache * instead of head cache and allocate a cloned (child) skb. * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for * allocations in case the data is required for writeback * @node: numa node to allocate memory on * * Allocate a new &sk_buff. The returned buffer has no headroom and a * tail room of at least size bytes. The object has a reference count * of one. The return is the buffer. On a failure the return is %NULL. * * Buffers may only be allocated from interrupts using a @gfp_mask of * %GFP_ATOMIC. */ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, int flags, int node) { struct kmem_cache *cache; struct sk_buff *skb; bool pfmemalloc; u8 *data; cache = (flags & SKB_ALLOC_FCLONE) ? net_hotdata.skbuff_fclone_cache : net_hotdata.skbuff_cache; if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX)) gfp_mask |= __GFP_MEMALLOC; /* Get the HEAD */ if ((flags & (SKB_ALLOC_FCLONE | SKB_ALLOC_NAPI)) == SKB_ALLOC_NAPI && likely(node == NUMA_NO_NODE || node == numa_mem_id())) skb = napi_skb_cache_get(); else skb = kmem_cache_alloc_node(cache, gfp_mask & ~GFP_DMA, node); if (unlikely(!skb)) return NULL; prefetchw(skb); /* We do our best to align skb_shared_info on a separate cache * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives * aligned memory blocks, unless SLUB/SLAB debug is enabled. * Both skb->head and skb_shared_info are cache line aligned. */ data = kmalloc_reserve(&size, gfp_mask, node, &pfmemalloc); if (unlikely(!data)) goto nodata; /* kmalloc_size_roundup() might give us more room than requested. * Put skb_shared_info exactly at the end of allocated zone, * to allow max possible filling before reallocation. */ prefetchw(data + SKB_WITH_OVERHEAD(size)); /* * Only clear those fields we need to clear, not those that we will * actually initialise below. Hence, don't put any more fields after * the tail pointer in struct sk_buff! */ memset(skb, 0, offsetof(struct sk_buff, tail)); __build_skb_around(skb, data, size); skb->pfmemalloc = pfmemalloc; if (flags & SKB_ALLOC_FCLONE) { struct sk_buff_fclones *fclones; fclones = container_of(skb, struct sk_buff_fclones, skb1); skb->fclone = SKB_FCLONE_ORIG; refcount_set(&fclones->fclone_ref, 1); } return skb; nodata: kmem_cache_free(cache, skb); return NULL; } EXPORT_SYMBOL(__alloc_skb); /** * __netdev_alloc_skb - allocate an skbuff for rx on a specific device * @dev: network device to receive on * @len: length to allocate * @gfp_mask: get_free_pages mask, passed to alloc_skb * * Allocate a new &sk_buff and assign it a usage count of one. The * buffer has NET_SKB_PAD headroom built in. Users should allocate * the headroom they think they need without accounting for the * built in space. The built in space is used for optimisations. * * %NULL is returned if there is no free memory. */ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, gfp_t gfp_mask) { struct page_frag_cache *nc; struct sk_buff *skb; bool pfmemalloc; void *data; len += NET_SKB_PAD; /* If requested length is either too small or too big, * we use kmalloc() for skb->head allocation. */ if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) || len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); if (!skb) goto skb_fail; goto skb_success; } len = SKB_HEAD_ALIGN(len); if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; if (in_hardirq() || irqs_disabled()) { nc = this_cpu_ptr(&netdev_alloc_cache); data = page_frag_alloc(nc, len, gfp_mask); pfmemalloc = page_frag_cache_is_pfmemalloc(nc); } else { local_bh_disable(); local_lock_nested_bh(&napi_alloc_cache.bh_lock); nc = this_cpu_ptr(&napi_alloc_cache.page); data = page_frag_alloc(nc, len, gfp_mask); pfmemalloc = page_frag_cache_is_pfmemalloc(nc); local_unlock_nested_bh(&napi_alloc_cache.bh_lock); local_bh_enable(); } if (unlikely(!data)) return NULL; skb = __build_skb(data, len); if (unlikely(!skb)) { skb_free_frag(data); return NULL; } if (pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; skb_success: skb_reserve(skb, NET_SKB_PAD); skb->dev = dev; skb_fail: return skb; } EXPORT_SYMBOL(__netdev_alloc_skb); /** * napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance * @napi: napi instance this buffer was allocated for * @len: length to allocate * * Allocate a new sk_buff for use in NAPI receive. This buffer will * attempt to allocate the head from a special reserved region used * only for NAPI Rx allocation. By doing this we can save several * CPU cycles by avoiding having to disable and re-enable IRQs. * * %NULL is returned if there is no free memory. */ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) { gfp_t gfp_mask = GFP_ATOMIC | __GFP_NOWARN; struct napi_alloc_cache *nc; struct sk_buff *skb; bool pfmemalloc; void *data; DEBUG_NET_WARN_ON_ONCE(!in_softirq()); len += NET_SKB_PAD + NET_IP_ALIGN; /* If requested length is either too small or too big, * we use kmalloc() for skb->head allocation. */ if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) || len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI, NUMA_NO_NODE); if (!skb) goto skb_fail; goto skb_success; } len = SKB_HEAD_ALIGN(len); if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; local_lock_nested_bh(&napi_alloc_cache.bh_lock); nc = this_cpu_ptr(&napi_alloc_cache); data = page_frag_alloc(&nc->page, len, gfp_mask); pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page); local_unlock_nested_bh(&napi_alloc_cache.bh_lock); if (unlikely(!data)) return NULL; skb = __napi_build_skb(data, len); if (unlikely(!skb)) { skb_free_frag(data); return NULL; } if (pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; skb_success: skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); skb->dev = napi->dev; skb_fail: return skb; } EXPORT_SYMBOL(napi_alloc_skb); void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, int off, int size, unsigned int truesize) { DEBUG_NET_WARN_ON_ONCE(size > truesize); skb_fill_netmem_desc(skb, i, netmem, off, size); skb->len += size; skb->data_len += size; skb->truesize += truesize; } EXPORT_SYMBOL(skb_add_rx_frag_netmem); void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, unsigned int truesize) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; DEBUG_NET_WARN_ON_ONCE(size > truesize); skb_frag_size_add(frag, size); skb->len += size; skb->data_len += size; skb->truesize += truesize; } EXPORT_SYMBOL(skb_coalesce_rx_frag); static void skb_drop_list(struct sk_buff **listp) { kfree_skb_list(*listp); *listp = NULL; } static inline void skb_drop_fraglist(struct sk_buff *skb) { skb_drop_list(&skb_shinfo(skb)->frag_list); } static void skb_clone_fraglist(struct sk_buff *skb) { struct sk_buff *list; skb_walk_frags(skb, list) skb_get(list); } int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom) { #if IS_ENABLED(CONFIG_PAGE_POOL) u32 size, truesize, len, max_head_size, off; struct sk_buff *skb = *pskb, *nskb; int err, i, head_off; void *data; /* XDP does not support fraglist so we need to linearize * the skb. */ if (skb_has_frag_list(skb)) return -EOPNOTSUPP; max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE - headroom); if (skb->len > max_head_size + MAX_SKB_FRAGS * PAGE_SIZE) return -ENOMEM; size = min_t(u32, skb->len, max_head_size); truesize = SKB_HEAD_ALIGN(size) + headroom; data = page_pool_dev_alloc_va(pool, &truesize); if (!data) return -ENOMEM; nskb = napi_build_skb(data, truesize); if (!nskb) { page_pool_free_va(pool, data, true); return -ENOMEM; } skb_reserve(nskb, headroom); skb_copy_header(nskb, skb); skb_mark_for_recycle(nskb); err = skb_copy_bits(skb, 0, nskb->data, size); if (err) { consume_skb(nskb); return err; } skb_put(nskb, size); head_off = skb_headroom(nskb) - skb_headroom(skb); skb_headers_offset_update(nskb, head_off); off = size; len = skb->len - off; for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) { struct page *page; u32 page_off; size = min_t(u32, len, PAGE_SIZE); truesize = size; page = page_pool_dev_alloc(pool, &page_off, &truesize); if (!page) { consume_skb(nskb); return -ENOMEM; } skb_add_rx_frag(nskb, i, page, page_off, size, truesize); err = skb_copy_bits(skb, off, page_address(page) + page_off, size); if (err) { consume_skb(nskb); return err; } len -= size; off += size; } consume_skb(skb); *pskb = nskb; return 0; #else return -EOPNOTSUPP; #endif } EXPORT_SYMBOL(skb_pp_cow_data); int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, const struct bpf_prog *prog) { if (!prog->aux->xdp_has_frags) return -EINVAL; return skb_pp_cow_data(pool, pskb, XDP_PACKET_HEADROOM); } EXPORT_SYMBOL(skb_cow_data_for_xdp); #if IS_ENABLED(CONFIG_PAGE_POOL) bool napi_pp_put_page(netmem_ref netmem) { netmem = netmem_compound_head(netmem); if (unlikely(!netmem_is_pp(netmem))) return false; page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, false); return true; } EXPORT_SYMBOL(napi_pp_put_page); #endif static bool skb_pp_recycle(struct sk_buff *skb, void *data) { if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle) return false; return napi_pp_put_page(page_to_netmem(virt_to_page(data))); } /** * skb_pp_frag_ref() - Increase fragment references of a page pool aware skb * @skb: page pool aware skb * * Increase the fragment reference count (pp_ref_count) of a skb. This is * intended to gain fragment references only for page pool aware skbs, * i.e. when skb->pp_recycle is true, and not for fragments in a * non-pp-recycling skb. It has a fallback to increase references on normal * pages, as page pool aware skbs may also have normal page fragments. */ static int skb_pp_frag_ref(struct sk_buff *skb) { struct skb_shared_info *shinfo; netmem_ref head_netmem; int i; if (!skb->pp_recycle) return -EINVAL; shinfo = skb_shinfo(skb); for (i = 0; i < shinfo->nr_frags; i++) { head_netmem = netmem_compound_head(shinfo->frags[i].netmem); if (likely(netmem_is_pp(head_netmem))) page_pool_ref_netmem(head_netmem); else page_ref_inc(netmem_to_page(head_netmem)); } return 0; } static void skb_kfree_head(void *head, unsigned int end_offset) { if (end_offset == SKB_SMALL_HEAD_HEADROOM) kmem_cache_free(net_hotdata.skb_small_head_cache, head); else kfree(head); } static void skb_free_head(struct sk_buff *skb) { unsigned char *head = skb->head; if (skb->head_frag) { if (skb_pp_recycle(skb, head)) return; skb_free_frag(head); } else { skb_kfree_head(head, skb_end_offset(skb)); } } static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason) { struct skb_shared_info *shinfo = skb_shinfo(skb); int i; if (!skb_data_unref(skb, shinfo)) goto exit; if (skb_zcopy(skb)) { bool skip_unref = shinfo->flags & SKBFL_MANAGED_FRAG_REFS; skb_zcopy_clear(skb, true); if (skip_unref) goto free_head; } for (i = 0; i < shinfo->nr_frags; i++) __skb_frag_unref(&shinfo->frags[i], skb->pp_recycle); free_head: if (shinfo->frag_list) kfree_skb_list_reason(shinfo->frag_list, reason); skb_free_head(skb); exit: /* When we clone an SKB we copy the reycling bit. The pp_recycle * bit is only set on the head though, so in order to avoid races * while trying to recycle fragments on __skb_frag_unref() we need * to make one SKB responsible for triggering the recycle path. * So disable the recycling bit if an SKB is cloned and we have * additional references to the fragmented part of the SKB. * Eventually the last SKB will have the recycling bit set and it's * dataref set to 0, which will trigger the recycling */ skb->pp_recycle = 0; } /* * Free an skbuff by memory without cleaning the state. */ static void kfree_skbmem(struct sk_buff *skb) { struct sk_buff_fclones *fclones; switch (skb->fclone) { case SKB_FCLONE_UNAVAILABLE: kmem_cache_free(net_hotdata.skbuff_cache, skb); return; case SKB_FCLONE_ORIG: fclones = container_of(skb, struct sk_buff_fclones, skb1); /* We usually free the clone (TX completion) before original skb * This test would have no chance to be true for the clone, * while here, branch prediction will be good. */ if (refcount_read(&fclones->fclone_ref) == 1) goto fastpath; break; default: /* SKB_FCLONE_CLONE */ fclones = container_of(skb, struct sk_buff_fclones, skb2); break; } if (!refcount_dec_and_test(&fclones->fclone_ref)) return; fastpath: kmem_cache_free(net_hotdata.skbuff_fclone_cache, fclones); } void skb_release_head_state(struct sk_buff *skb) { skb_dst_drop(skb); if (skb->destructor) { DEBUG_NET_WARN_ON_ONCE(in_hardirq()); skb->destructor(skb); } #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_conntrack_put(skb_nfct(skb)); #endif skb_ext_put(skb); } /* Free everything but the sk_buff shell. */ static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason) { skb_release_head_state(skb); if (likely(skb->head)) skb_release_data(skb, reason); } /** * __kfree_skb - private function * @skb: buffer * * Free an sk_buff. Release anything attached to the buffer. * Clean the state. This is an internal helper function. Users should * always call kfree_skb */ void __kfree_skb(struct sk_buff *skb) { skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED); kfree_skbmem(skb); } EXPORT_SYMBOL(__kfree_skb); static __always_inline bool __sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason) { if (unlikely(!skb_unref(skb))) return false; DEBUG_NET_WARN_ON_ONCE(reason == SKB_NOT_DROPPED_YET || u32_get_bits(reason, SKB_DROP_REASON_SUBSYS_MASK) >= SKB_DROP_REASON_SUBSYS_NUM); if (reason == SKB_CONSUMED) trace_consume_skb(skb, __builtin_return_address(0)); else trace_kfree_skb(skb, __builtin_return_address(0), reason, sk); return true; } /** * sk_skb_reason_drop - free an sk_buff with special reason * @sk: the socket to receive @skb, or NULL if not applicable * @skb: buffer to free * @reason: reason why this skb is dropped * * Drop a reference to the buffer and free it if the usage count has hit * zero. Meanwhile, pass the receiving socket and drop reason to * 'kfree_skb' tracepoint. */ void __fix_address sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason) { if (__sk_skb_reason_drop(sk, skb, reason)) __kfree_skb(skb); } EXPORT_SYMBOL(sk_skb_reason_drop); #define KFREE_SKB_BULK_SIZE 16 struct skb_free_array { unsigned int skb_count; void *skb_array[KFREE_SKB_BULK_SIZE]; }; static void kfree_skb_add_bulk(struct sk_buff *skb, struct skb_free_array *sa, enum skb_drop_reason reason) { /* if SKB is a clone, don't handle this case */ if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) { __kfree_skb(skb); return; } skb_release_all(skb, reason); sa->skb_array[sa->skb_count++] = skb; if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) { kmem_cache_free_bulk(net_hotdata.skbuff_cache, KFREE_SKB_BULK_SIZE, sa->skb_array); sa->skb_count = 0; } } void __fix_address kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason) { struct skb_free_array sa; sa.skb_count = 0; while (segs) { struct sk_buff *next = segs->next; if (__sk_skb_reason_drop(NULL, segs, reason)) { skb_poison_list(segs); kfree_skb_add_bulk(segs, &sa, reason); } segs = next; } if (sa.skb_count) kmem_cache_free_bulk(net_hotdata.skbuff_cache, sa.skb_count, sa.skb_array); } EXPORT_SYMBOL(kfree_skb_list_reason); /* Dump skb information and contents. * * Must only be called from net_ratelimit()-ed paths. * * Dumps whole packets if full_pkt, only headers otherwise. */ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt) { struct skb_shared_info *sh = skb_shinfo(skb); struct net_device *dev = skb->dev; struct sock *sk = skb->sk; struct sk_buff *list_skb; bool has_mac, has_trans; int headroom, tailroom; int i, len, seg_len; if (full_pkt) len = skb->len; else len = min_t(int, skb->len, MAX_HEADER + 128); headroom = skb_headroom(skb); tailroom = skb_tailroom(skb); has_mac = skb_mac_header_was_set(skb); has_trans = skb_transport_header_was_set(skb); printk("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n" "mac=(%d,%d) mac_len=%u net=(%d,%d) trans=%d\n" "shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n" "csum(0x%x start=%u offset=%u ip_summed=%u complete_sw=%u valid=%u level=%u)\n" "hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n" "priority=0x%x mark=0x%x alloc_cpu=%u vlan_all=0x%x\n" "encapsulation=%d inner(proto=0x%04x, mac=%u, net=%u, trans=%u)\n", level, skb->len, headroom, skb_headlen(skb), tailroom, has_mac ? skb->mac_header : -1, has_mac ? skb_mac_header_len(skb) : -1, skb->mac_len, skb->network_header, has_trans ? skb_network_header_len(skb) : -1, has_trans ? skb->transport_header : -1, sh->tx_flags, sh->nr_frags, sh->gso_size, sh->gso_type, sh->gso_segs, skb->csum, skb->csum_start, skb->csum_offset, skb->ip_summed, skb->csum_complete_sw, skb->csum_valid, skb->csum_level, skb->hash, skb->sw_hash, skb->l4_hash, ntohs(skb->protocol), skb->pkt_type, skb->skb_iif, skb->priority, skb->mark, skb->alloc_cpu, skb->vlan_all, skb->encapsulation, skb->inner_protocol, skb->inner_mac_header, skb->inner_network_header, skb->inner_transport_header); if (dev) printk("%sdev name=%s feat=%pNF\n", level, dev->name, &dev->features); if (sk) printk("%ssk family=%hu type=%u proto=%u\n", level, sk->sk_family, sk->sk_type, sk->sk_protocol); if (full_pkt && headroom) print_hex_dump(level, "skb headroom: ", DUMP_PREFIX_OFFSET, 16, 1, skb->head, headroom, false); seg_len = min_t(int, skb_headlen(skb), len); if (seg_len) print_hex_dump(level, "skb linear: ", DUMP_PREFIX_OFFSET, 16, 1, skb->data, seg_len, false); len -= seg_len; if (full_pkt && tailroom) print_hex_dump(level, "skb tailroom: ", DUMP_PREFIX_OFFSET, 16, 1, skb_tail_pointer(skb), tailroom, false); for (i = 0; len && i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; u32 p_off, p_len, copied; struct page *p; u8 *vaddr; if (skb_frag_is_net_iov(frag)) { printk("%sskb frag %d: not readable\n", level, i); len -= skb_frag_size(frag); if (!len) break; continue; } skb_frag_foreach_page(frag, skb_frag_off(frag), skb_frag_size(frag), p, p_off, p_len, copied) { seg_len = min_t(int, p_len, len); vaddr = kmap_atomic(p); print_hex_dump(level, "skb frag: ", DUMP_PREFIX_OFFSET, 16, 1, vaddr + p_off, seg_len, false); kunmap_atomic(vaddr); len -= seg_len; if (!len) break; } } if (full_pkt && skb_has_frag_list(skb)) { printk("skb fraglist:\n"); skb_walk_frags(skb, list_skb) skb_dump(level, list_skb, true); } } EXPORT_SYMBOL(skb_dump); /** * skb_tx_error - report an sk_buff xmit error * @skb: buffer that triggered an error * * Report xmit error if a device callback is tracking this skb. * skb must be freed afterwards. */ void skb_tx_error(struct sk_buff *skb) { if (skb) { skb_zcopy_downgrade_managed(skb); skb_zcopy_clear(skb, true); } } EXPORT_SYMBOL(skb_tx_error); #ifdef CONFIG_TRACEPOINTS /** * consume_skb - free an skbuff * @skb: buffer to free * * Drop a ref to the buffer and free it if the usage count has hit zero * Functions identically to kfree_skb, but kfree_skb assumes that the frame * is being dropped after a failure and notes that */ void consume_skb(struct sk_buff *skb) { if (!skb_unref(skb)) return; trace_consume_skb(skb, __builtin_return_address(0)); __kfree_skb(skb); } EXPORT_SYMBOL(consume_skb); #endif /** * __consume_stateless_skb - free an skbuff, assuming it is stateless * @skb: buffer to free * * Alike consume_skb(), but this variant assumes that this is the last * skb reference and all the head states have been already dropped */ void __consume_stateless_skb(struct sk_buff *skb) { trace_consume_skb(skb, __builtin_return_address(0)); skb_release_data(skb, SKB_CONSUMED); kfree_skbmem(skb); } static void napi_skb_cache_put(struct sk_buff *skb) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);